1 /*
2  * This file includes functions to transform a concrete syntax tree (CST) to
3  * an abstract syntax tree (AST). The main function is PyAST_FromNode().
4  *
5  */
6 #include "Python.h"
7 #include "Python-ast.h"
8 #include "node.h"
9 #include "ast.h"
10 #include "token.h"
11 #include "pythonrun.h"
12 
13 #include <assert.h>
14 #include <stdbool.h>
15 
16 #define MAXLEVEL 200    /* Max parentheses level */
17 
18 static int validate_stmts(asdl_seq *);
19 static int validate_exprs(asdl_seq *, expr_context_ty, int);
20 static int validate_nonempty_seq(asdl_seq *, const char *, const char *);
21 static int validate_stmt(stmt_ty);
22 static int validate_expr(expr_ty, expr_context_ty);
23 
24 static int
validate_name(PyObject * name)25 validate_name(PyObject *name)
26 {
27     assert(PyUnicode_Check(name));
28     static const char * const forbidden[] = {
29         "None",
30         "True",
31         "False",
32         NULL
33     };
34     for (int i = 0; forbidden[i] != NULL; i++) {
35         if (_PyUnicode_EqualToASCIIString(name, forbidden[i])) {
36             PyErr_Format(PyExc_ValueError, "Name node can't be used with '%s' constant", forbidden[i]);
37             return 0;
38         }
39     }
40     return 1;
41 }
42 
43 static int
validate_comprehension(asdl_seq * gens)44 validate_comprehension(asdl_seq *gens)
45 {
46     Py_ssize_t i;
47     if (!asdl_seq_LEN(gens)) {
48         PyErr_SetString(PyExc_ValueError, "comprehension with no generators");
49         return 0;
50     }
51     for (i = 0; i < asdl_seq_LEN(gens); i++) {
52         comprehension_ty comp = asdl_seq_GET(gens, i);
53         if (!validate_expr(comp->target, Store) ||
54             !validate_expr(comp->iter, Load) ||
55             !validate_exprs(comp->ifs, Load, 0))
56             return 0;
57     }
58     return 1;
59 }
60 
61 static int
validate_slice(slice_ty slice)62 validate_slice(slice_ty slice)
63 {
64     switch (slice->kind) {
65     case Slice_kind:
66         return (!slice->v.Slice.lower || validate_expr(slice->v.Slice.lower, Load)) &&
67             (!slice->v.Slice.upper || validate_expr(slice->v.Slice.upper, Load)) &&
68             (!slice->v.Slice.step || validate_expr(slice->v.Slice.step, Load));
69     case ExtSlice_kind: {
70         Py_ssize_t i;
71         if (!validate_nonempty_seq(slice->v.ExtSlice.dims, "dims", "ExtSlice"))
72             return 0;
73         for (i = 0; i < asdl_seq_LEN(slice->v.ExtSlice.dims); i++)
74             if (!validate_slice(asdl_seq_GET(slice->v.ExtSlice.dims, i)))
75                 return 0;
76         return 1;
77     }
78     case Index_kind:
79         return validate_expr(slice->v.Index.value, Load);
80     default:
81         PyErr_SetString(PyExc_SystemError, "unknown slice node");
82         return 0;
83     }
84 }
85 
86 static int
validate_keywords(asdl_seq * keywords)87 validate_keywords(asdl_seq *keywords)
88 {
89     Py_ssize_t i;
90     for (i = 0; i < asdl_seq_LEN(keywords); i++)
91         if (!validate_expr(((keyword_ty)asdl_seq_GET(keywords, i))->value, Load))
92             return 0;
93     return 1;
94 }
95 
96 static int
validate_args(asdl_seq * args)97 validate_args(asdl_seq *args)
98 {
99     Py_ssize_t i;
100     for (i = 0; i < asdl_seq_LEN(args); i++) {
101         arg_ty arg = asdl_seq_GET(args, i);
102         if (arg->annotation && !validate_expr(arg->annotation, Load))
103             return 0;
104     }
105     return 1;
106 }
107 
108 static const char *
expr_context_name(expr_context_ty ctx)109 expr_context_name(expr_context_ty ctx)
110 {
111     switch (ctx) {
112     case Load:
113         return "Load";
114     case Store:
115         return "Store";
116     case Del:
117         return "Del";
118     case AugLoad:
119         return "AugLoad";
120     case AugStore:
121         return "AugStore";
122     case Param:
123         return "Param";
124     default:
125         Py_UNREACHABLE();
126     }
127 }
128 
129 static int
validate_arguments(arguments_ty args)130 validate_arguments(arguments_ty args)
131 {
132     if (!validate_args(args->posonlyargs) || !validate_args(args->args)) {
133         return 0;
134     }
135     if (args->vararg && args->vararg->annotation
136         && !validate_expr(args->vararg->annotation, Load)) {
137             return 0;
138     }
139     if (!validate_args(args->kwonlyargs))
140         return 0;
141     if (args->kwarg && args->kwarg->annotation
142         && !validate_expr(args->kwarg->annotation, Load)) {
143             return 0;
144     }
145     if (asdl_seq_LEN(args->defaults) > asdl_seq_LEN(args->posonlyargs) + asdl_seq_LEN(args->args)) {
146         PyErr_SetString(PyExc_ValueError, "more positional defaults than args on arguments");
147         return 0;
148     }
149     if (asdl_seq_LEN(args->kw_defaults) != asdl_seq_LEN(args->kwonlyargs)) {
150         PyErr_SetString(PyExc_ValueError, "length of kwonlyargs is not the same as "
151                         "kw_defaults on arguments");
152         return 0;
153     }
154     return validate_exprs(args->defaults, Load, 0) && validate_exprs(args->kw_defaults, Load, 1);
155 }
156 
157 static int
validate_constant(PyObject * value)158 validate_constant(PyObject *value)
159 {
160     if (value == Py_None || value == Py_Ellipsis)
161         return 1;
162 
163     if (PyLong_CheckExact(value)
164             || PyFloat_CheckExact(value)
165             || PyComplex_CheckExact(value)
166             || PyBool_Check(value)
167             || PyUnicode_CheckExact(value)
168             || PyBytes_CheckExact(value))
169         return 1;
170 
171     if (PyTuple_CheckExact(value) || PyFrozenSet_CheckExact(value)) {
172         PyObject *it;
173 
174         it = PyObject_GetIter(value);
175         if (it == NULL)
176             return 0;
177 
178         while (1) {
179             PyObject *item = PyIter_Next(it);
180             if (item == NULL) {
181                 if (PyErr_Occurred()) {
182                     Py_DECREF(it);
183                     return 0;
184                 }
185                 break;
186             }
187 
188             if (!validate_constant(item)) {
189                 Py_DECREF(it);
190                 Py_DECREF(item);
191                 return 0;
192             }
193             Py_DECREF(item);
194         }
195 
196         Py_DECREF(it);
197         return 1;
198     }
199 
200     return 0;
201 }
202 
203 static int
validate_expr(expr_ty exp,expr_context_ty ctx)204 validate_expr(expr_ty exp, expr_context_ty ctx)
205 {
206     int check_ctx = 1;
207     expr_context_ty actual_ctx;
208 
209     /* First check expression context. */
210     switch (exp->kind) {
211     case Attribute_kind:
212         actual_ctx = exp->v.Attribute.ctx;
213         break;
214     case Subscript_kind:
215         actual_ctx = exp->v.Subscript.ctx;
216         break;
217     case Starred_kind:
218         actual_ctx = exp->v.Starred.ctx;
219         break;
220     case Name_kind:
221         if (!validate_name(exp->v.Name.id)) {
222             return 0;
223         }
224         actual_ctx = exp->v.Name.ctx;
225         break;
226     case List_kind:
227         actual_ctx = exp->v.List.ctx;
228         break;
229     case Tuple_kind:
230         actual_ctx = exp->v.Tuple.ctx;
231         break;
232     default:
233         if (ctx != Load) {
234             PyErr_Format(PyExc_ValueError, "expression which can't be "
235                          "assigned to in %s context", expr_context_name(ctx));
236             return 0;
237         }
238         check_ctx = 0;
239         /* set actual_ctx to prevent gcc warning */
240         actual_ctx = 0;
241     }
242     if (check_ctx && actual_ctx != ctx) {
243         PyErr_Format(PyExc_ValueError, "expression must have %s context but has %s instead",
244                      expr_context_name(ctx), expr_context_name(actual_ctx));
245         return 0;
246     }
247 
248     /* Now validate expression. */
249     switch (exp->kind) {
250     case BoolOp_kind:
251         if (asdl_seq_LEN(exp->v.BoolOp.values) < 2) {
252             PyErr_SetString(PyExc_ValueError, "BoolOp with less than 2 values");
253             return 0;
254         }
255         return validate_exprs(exp->v.BoolOp.values, Load, 0);
256     case BinOp_kind:
257         return validate_expr(exp->v.BinOp.left, Load) &&
258             validate_expr(exp->v.BinOp.right, Load);
259     case UnaryOp_kind:
260         return validate_expr(exp->v.UnaryOp.operand, Load);
261     case Lambda_kind:
262         return validate_arguments(exp->v.Lambda.args) &&
263             validate_expr(exp->v.Lambda.body, Load);
264     case IfExp_kind:
265         return validate_expr(exp->v.IfExp.test, Load) &&
266             validate_expr(exp->v.IfExp.body, Load) &&
267             validate_expr(exp->v.IfExp.orelse, Load);
268     case Dict_kind:
269         if (asdl_seq_LEN(exp->v.Dict.keys) != asdl_seq_LEN(exp->v.Dict.values)) {
270             PyErr_SetString(PyExc_ValueError,
271                             "Dict doesn't have the same number of keys as values");
272             return 0;
273         }
274         /* null_ok=1 for keys expressions to allow dict unpacking to work in
275            dict literals, i.e. ``{**{a:b}}`` */
276         return validate_exprs(exp->v.Dict.keys, Load, /*null_ok=*/ 1) &&
277             validate_exprs(exp->v.Dict.values, Load, /*null_ok=*/ 0);
278     case Set_kind:
279         return validate_exprs(exp->v.Set.elts, Load, 0);
280 #define COMP(NAME) \
281         case NAME ## _kind: \
282             return validate_comprehension(exp->v.NAME.generators) && \
283                 validate_expr(exp->v.NAME.elt, Load);
284     COMP(ListComp)
285     COMP(SetComp)
286     COMP(GeneratorExp)
287 #undef COMP
288     case DictComp_kind:
289         return validate_comprehension(exp->v.DictComp.generators) &&
290             validate_expr(exp->v.DictComp.key, Load) &&
291             validate_expr(exp->v.DictComp.value, Load);
292     case Yield_kind:
293         return !exp->v.Yield.value || validate_expr(exp->v.Yield.value, Load);
294     case YieldFrom_kind:
295         return validate_expr(exp->v.YieldFrom.value, Load);
296     case Await_kind:
297         return validate_expr(exp->v.Await.value, Load);
298     case Compare_kind:
299         if (!asdl_seq_LEN(exp->v.Compare.comparators)) {
300             PyErr_SetString(PyExc_ValueError, "Compare with no comparators");
301             return 0;
302         }
303         if (asdl_seq_LEN(exp->v.Compare.comparators) !=
304             asdl_seq_LEN(exp->v.Compare.ops)) {
305             PyErr_SetString(PyExc_ValueError, "Compare has a different number "
306                             "of comparators and operands");
307             return 0;
308         }
309         return validate_exprs(exp->v.Compare.comparators, Load, 0) &&
310             validate_expr(exp->v.Compare.left, Load);
311     case Call_kind:
312         return validate_expr(exp->v.Call.func, Load) &&
313             validate_exprs(exp->v.Call.args, Load, 0) &&
314             validate_keywords(exp->v.Call.keywords);
315     case Constant_kind:
316         if (!validate_constant(exp->v.Constant.value)) {
317             PyErr_Format(PyExc_TypeError,
318                          "got an invalid type in Constant: %s",
319                          Py_TYPE(exp->v.Constant.value)->tp_name);
320             return 0;
321         }
322         return 1;
323     case JoinedStr_kind:
324         return validate_exprs(exp->v.JoinedStr.values, Load, 0);
325     case FormattedValue_kind:
326         if (validate_expr(exp->v.FormattedValue.value, Load) == 0)
327             return 0;
328         if (exp->v.FormattedValue.format_spec)
329             return validate_expr(exp->v.FormattedValue.format_spec, Load);
330         return 1;
331     case Attribute_kind:
332         return validate_expr(exp->v.Attribute.value, Load);
333     case Subscript_kind:
334         return validate_slice(exp->v.Subscript.slice) &&
335             validate_expr(exp->v.Subscript.value, Load);
336     case Starred_kind:
337         return validate_expr(exp->v.Starred.value, ctx);
338     case List_kind:
339         return validate_exprs(exp->v.List.elts, ctx, 0);
340     case Tuple_kind:
341         return validate_exprs(exp->v.Tuple.elts, ctx, 0);
342     case NamedExpr_kind:
343         return validate_expr(exp->v.NamedExpr.value, Load);
344     /* This last case doesn't have any checking. */
345     case Name_kind:
346         return 1;
347     }
348     PyErr_SetString(PyExc_SystemError, "unexpected expression");
349     return 0;
350 }
351 
352 static int
validate_nonempty_seq(asdl_seq * seq,const char * what,const char * owner)353 validate_nonempty_seq(asdl_seq *seq, const char *what, const char *owner)
354 {
355     if (asdl_seq_LEN(seq))
356         return 1;
357     PyErr_Format(PyExc_ValueError, "empty %s on %s", what, owner);
358     return 0;
359 }
360 
361 static int
validate_assignlist(asdl_seq * targets,expr_context_ty ctx)362 validate_assignlist(asdl_seq *targets, expr_context_ty ctx)
363 {
364     return validate_nonempty_seq(targets, "targets", ctx == Del ? "Delete" : "Assign") &&
365         validate_exprs(targets, ctx, 0);
366 }
367 
368 static int
validate_body(asdl_seq * body,const char * owner)369 validate_body(asdl_seq *body, const char *owner)
370 {
371     return validate_nonempty_seq(body, "body", owner) && validate_stmts(body);
372 }
373 
374 static int
validate_stmt(stmt_ty stmt)375 validate_stmt(stmt_ty stmt)
376 {
377     Py_ssize_t i;
378     switch (stmt->kind) {
379     case FunctionDef_kind:
380         return validate_body(stmt->v.FunctionDef.body, "FunctionDef") &&
381             validate_arguments(stmt->v.FunctionDef.args) &&
382             validate_exprs(stmt->v.FunctionDef.decorator_list, Load, 0) &&
383             (!stmt->v.FunctionDef.returns ||
384              validate_expr(stmt->v.FunctionDef.returns, Load));
385     case ClassDef_kind:
386         return validate_body(stmt->v.ClassDef.body, "ClassDef") &&
387             validate_exprs(stmt->v.ClassDef.bases, Load, 0) &&
388             validate_keywords(stmt->v.ClassDef.keywords) &&
389             validate_exprs(stmt->v.ClassDef.decorator_list, Load, 0);
390     case Return_kind:
391         return !stmt->v.Return.value || validate_expr(stmt->v.Return.value, Load);
392     case Delete_kind:
393         return validate_assignlist(stmt->v.Delete.targets, Del);
394     case Assign_kind:
395         return validate_assignlist(stmt->v.Assign.targets, Store) &&
396             validate_expr(stmt->v.Assign.value, Load);
397     case AugAssign_kind:
398         return validate_expr(stmt->v.AugAssign.target, Store) &&
399             validate_expr(stmt->v.AugAssign.value, Load);
400     case AnnAssign_kind:
401         if (stmt->v.AnnAssign.target->kind != Name_kind &&
402             stmt->v.AnnAssign.simple) {
403             PyErr_SetString(PyExc_TypeError,
404                             "AnnAssign with simple non-Name target");
405             return 0;
406         }
407         return validate_expr(stmt->v.AnnAssign.target, Store) &&
408                (!stmt->v.AnnAssign.value ||
409                 validate_expr(stmt->v.AnnAssign.value, Load)) &&
410                validate_expr(stmt->v.AnnAssign.annotation, Load);
411     case For_kind:
412         return validate_expr(stmt->v.For.target, Store) &&
413             validate_expr(stmt->v.For.iter, Load) &&
414             validate_body(stmt->v.For.body, "For") &&
415             validate_stmts(stmt->v.For.orelse);
416     case AsyncFor_kind:
417         return validate_expr(stmt->v.AsyncFor.target, Store) &&
418             validate_expr(stmt->v.AsyncFor.iter, Load) &&
419             validate_body(stmt->v.AsyncFor.body, "AsyncFor") &&
420             validate_stmts(stmt->v.AsyncFor.orelse);
421     case While_kind:
422         return validate_expr(stmt->v.While.test, Load) &&
423             validate_body(stmt->v.While.body, "While") &&
424             validate_stmts(stmt->v.While.orelse);
425     case If_kind:
426         return validate_expr(stmt->v.If.test, Load) &&
427             validate_body(stmt->v.If.body, "If") &&
428             validate_stmts(stmt->v.If.orelse);
429     case With_kind:
430         if (!validate_nonempty_seq(stmt->v.With.items, "items", "With"))
431             return 0;
432         for (i = 0; i < asdl_seq_LEN(stmt->v.With.items); i++) {
433             withitem_ty item = asdl_seq_GET(stmt->v.With.items, i);
434             if (!validate_expr(item->context_expr, Load) ||
435                 (item->optional_vars && !validate_expr(item->optional_vars, Store)))
436                 return 0;
437         }
438         return validate_body(stmt->v.With.body, "With");
439     case AsyncWith_kind:
440         if (!validate_nonempty_seq(stmt->v.AsyncWith.items, "items", "AsyncWith"))
441             return 0;
442         for (i = 0; i < asdl_seq_LEN(stmt->v.AsyncWith.items); i++) {
443             withitem_ty item = asdl_seq_GET(stmt->v.AsyncWith.items, i);
444             if (!validate_expr(item->context_expr, Load) ||
445                 (item->optional_vars && !validate_expr(item->optional_vars, Store)))
446                 return 0;
447         }
448         return validate_body(stmt->v.AsyncWith.body, "AsyncWith");
449     case Raise_kind:
450         if (stmt->v.Raise.exc) {
451             return validate_expr(stmt->v.Raise.exc, Load) &&
452                 (!stmt->v.Raise.cause || validate_expr(stmt->v.Raise.cause, Load));
453         }
454         if (stmt->v.Raise.cause) {
455             PyErr_SetString(PyExc_ValueError, "Raise with cause but no exception");
456             return 0;
457         }
458         return 1;
459     case Try_kind:
460         if (!validate_body(stmt->v.Try.body, "Try"))
461             return 0;
462         if (!asdl_seq_LEN(stmt->v.Try.handlers) &&
463             !asdl_seq_LEN(stmt->v.Try.finalbody)) {
464             PyErr_SetString(PyExc_ValueError, "Try has neither except handlers nor finalbody");
465             return 0;
466         }
467         if (!asdl_seq_LEN(stmt->v.Try.handlers) &&
468             asdl_seq_LEN(stmt->v.Try.orelse)) {
469             PyErr_SetString(PyExc_ValueError, "Try has orelse but no except handlers");
470             return 0;
471         }
472         for (i = 0; i < asdl_seq_LEN(stmt->v.Try.handlers); i++) {
473             excepthandler_ty handler = asdl_seq_GET(stmt->v.Try.handlers, i);
474             if ((handler->v.ExceptHandler.type &&
475                  !validate_expr(handler->v.ExceptHandler.type, Load)) ||
476                 !validate_body(handler->v.ExceptHandler.body, "ExceptHandler"))
477                 return 0;
478         }
479         return (!asdl_seq_LEN(stmt->v.Try.finalbody) ||
480                 validate_stmts(stmt->v.Try.finalbody)) &&
481             (!asdl_seq_LEN(stmt->v.Try.orelse) ||
482              validate_stmts(stmt->v.Try.orelse));
483     case Assert_kind:
484         return validate_expr(stmt->v.Assert.test, Load) &&
485             (!stmt->v.Assert.msg || validate_expr(stmt->v.Assert.msg, Load));
486     case Import_kind:
487         return validate_nonempty_seq(stmt->v.Import.names, "names", "Import");
488     case ImportFrom_kind:
489         if (stmt->v.ImportFrom.level < 0) {
490             PyErr_SetString(PyExc_ValueError, "Negative ImportFrom level");
491             return 0;
492         }
493         return validate_nonempty_seq(stmt->v.ImportFrom.names, "names", "ImportFrom");
494     case Global_kind:
495         return validate_nonempty_seq(stmt->v.Global.names, "names", "Global");
496     case Nonlocal_kind:
497         return validate_nonempty_seq(stmt->v.Nonlocal.names, "names", "Nonlocal");
498     case Expr_kind:
499         return validate_expr(stmt->v.Expr.value, Load);
500     case AsyncFunctionDef_kind:
501         return validate_body(stmt->v.AsyncFunctionDef.body, "AsyncFunctionDef") &&
502             validate_arguments(stmt->v.AsyncFunctionDef.args) &&
503             validate_exprs(stmt->v.AsyncFunctionDef.decorator_list, Load, 0) &&
504             (!stmt->v.AsyncFunctionDef.returns ||
505              validate_expr(stmt->v.AsyncFunctionDef.returns, Load));
506     case Pass_kind:
507     case Break_kind:
508     case Continue_kind:
509         return 1;
510     default:
511         PyErr_SetString(PyExc_SystemError, "unexpected statement");
512         return 0;
513     }
514 }
515 
516 static int
validate_stmts(asdl_seq * seq)517 validate_stmts(asdl_seq *seq)
518 {
519     Py_ssize_t i;
520     for (i = 0; i < asdl_seq_LEN(seq); i++) {
521         stmt_ty stmt = asdl_seq_GET(seq, i);
522         if (stmt) {
523             if (!validate_stmt(stmt))
524                 return 0;
525         }
526         else {
527             PyErr_SetString(PyExc_ValueError,
528                             "None disallowed in statement list");
529             return 0;
530         }
531     }
532     return 1;
533 }
534 
535 static int
validate_exprs(asdl_seq * exprs,expr_context_ty ctx,int null_ok)536 validate_exprs(asdl_seq *exprs, expr_context_ty ctx, int null_ok)
537 {
538     Py_ssize_t i;
539     for (i = 0; i < asdl_seq_LEN(exprs); i++) {
540         expr_ty expr = asdl_seq_GET(exprs, i);
541         if (expr) {
542             if (!validate_expr(expr, ctx))
543                 return 0;
544         }
545         else if (!null_ok) {
546             PyErr_SetString(PyExc_ValueError,
547                             "None disallowed in expression list");
548             return 0;
549         }
550 
551     }
552     return 1;
553 }
554 
555 int
PyAST_Validate(mod_ty mod)556 PyAST_Validate(mod_ty mod)
557 {
558     int res = 0;
559 
560     switch (mod->kind) {
561     case Module_kind:
562         res = validate_stmts(mod->v.Module.body);
563         break;
564     case Interactive_kind:
565         res = validate_stmts(mod->v.Interactive.body);
566         break;
567     case Expression_kind:
568         res = validate_expr(mod->v.Expression.body, Load);
569         break;
570     case Suite_kind:
571         PyErr_SetString(PyExc_ValueError, "Suite is not valid in the CPython compiler");
572         break;
573     default:
574         PyErr_SetString(PyExc_SystemError, "impossible module node");
575         res = 0;
576         break;
577     }
578     return res;
579 }
580 
581 /* This is done here, so defines like "test" don't interfere with AST use above. */
582 #include "grammar.h"
583 #include "parsetok.h"
584 #include "graminit.h"
585 
586 /* Data structure used internally */
587 struct compiling {
588     PyArena *c_arena; /* Arena for allocating memory. */
589     PyObject *c_filename; /* filename */
590     PyObject *c_normalize; /* Normalization function from unicodedata. */
591     int c_feature_version; /* Latest minor version of Python for allowed features */
592 };
593 
594 static asdl_seq *seq_for_testlist(struct compiling *, const node *);
595 static expr_ty ast_for_expr(struct compiling *, const node *);
596 static stmt_ty ast_for_stmt(struct compiling *, const node *);
597 static asdl_seq *ast_for_suite(struct compiling *c, const node *n);
598 static asdl_seq *ast_for_exprlist(struct compiling *, const node *,
599                                   expr_context_ty);
600 static expr_ty ast_for_testlist(struct compiling *, const node *);
601 static stmt_ty ast_for_classdef(struct compiling *, const node *, asdl_seq *);
602 
603 static stmt_ty ast_for_with_stmt(struct compiling *, const node *, bool);
604 static stmt_ty ast_for_for_stmt(struct compiling *, const node *, bool);
605 
606 /* Note different signature for ast_for_call */
607 static expr_ty ast_for_call(struct compiling *, const node *, expr_ty,
608                             const node *, const node *, const node *);
609 
610 static PyObject *parsenumber(struct compiling *, const char *);
611 static expr_ty parsestrplus(struct compiling *, const node *n);
612 static void get_last_end_pos(asdl_seq *, int *, int *);
613 
614 #define COMP_GENEXP   0
615 #define COMP_LISTCOMP 1
616 #define COMP_SETCOMP  2
617 
618 static int
init_normalization(struct compiling * c)619 init_normalization(struct compiling *c)
620 {
621     PyObject *m = PyImport_ImportModuleNoBlock("unicodedata");
622     if (!m)
623         return 0;
624     c->c_normalize = PyObject_GetAttrString(m, "normalize");
625     Py_DECREF(m);
626     if (!c->c_normalize)
627         return 0;
628     return 1;
629 }
630 
631 static identifier
new_identifier(const char * n,struct compiling * c)632 new_identifier(const char *n, struct compiling *c)
633 {
634     PyObject *id = PyUnicode_DecodeUTF8(n, strlen(n), NULL);
635     if (!id)
636         return NULL;
637     /* PyUnicode_DecodeUTF8 should always return a ready string. */
638     assert(PyUnicode_IS_READY(id));
639     /* Check whether there are non-ASCII characters in the
640        identifier; if so, normalize to NFKC. */
641     if (!PyUnicode_IS_ASCII(id)) {
642         PyObject *id2;
643         _Py_IDENTIFIER(NFKC);
644         if (!c->c_normalize && !init_normalization(c)) {
645             Py_DECREF(id);
646             return NULL;
647         }
648         PyObject *form = _PyUnicode_FromId(&PyId_NFKC);
649         if (form == NULL) {
650             Py_DECREF(id);
651             return NULL;
652         }
653         PyObject *args[2] = {form, id};
654         id2 = _PyObject_FastCall(c->c_normalize, args, 2);
655         Py_DECREF(id);
656         if (!id2)
657             return NULL;
658         if (!PyUnicode_Check(id2)) {
659             PyErr_Format(PyExc_TypeError,
660                          "unicodedata.normalize() must return a string, not "
661                          "%.200s",
662                          Py_TYPE(id2)->tp_name);
663             Py_DECREF(id2);
664             return NULL;
665         }
666         id = id2;
667     }
668     PyUnicode_InternInPlace(&id);
669     if (PyArena_AddPyObject(c->c_arena, id) < 0) {
670         Py_DECREF(id);
671         return NULL;
672     }
673     return id;
674 }
675 
676 #define NEW_IDENTIFIER(n) new_identifier(STR(n), c)
677 
678 static int
ast_error(struct compiling * c,const node * n,const char * errmsg,...)679 ast_error(struct compiling *c, const node *n, const char *errmsg, ...)
680 {
681     PyObject *value, *errstr, *loc, *tmp;
682     va_list va;
683 
684     va_start(va, errmsg);
685     errstr = PyUnicode_FromFormatV(errmsg, va);
686     va_end(va);
687     if (!errstr) {
688         return 0;
689     }
690     loc = PyErr_ProgramTextObject(c->c_filename, LINENO(n));
691     if (!loc) {
692         Py_INCREF(Py_None);
693         loc = Py_None;
694     }
695     tmp = Py_BuildValue("(OiiN)", c->c_filename, LINENO(n), n->n_col_offset + 1, loc);
696     if (!tmp) {
697         Py_DECREF(errstr);
698         return 0;
699     }
700     value = PyTuple_Pack(2, errstr, tmp);
701     Py_DECREF(errstr);
702     Py_DECREF(tmp);
703     if (value) {
704         PyErr_SetObject(PyExc_SyntaxError, value);
705         Py_DECREF(value);
706     }
707     return 0;
708 }
709 
710 /* num_stmts() returns number of contained statements.
711 
712    Use this routine to determine how big a sequence is needed for
713    the statements in a parse tree.  Its raison d'etre is this bit of
714    grammar:
715 
716    stmt: simple_stmt | compound_stmt
717    simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
718 
719    A simple_stmt can contain multiple small_stmt elements joined
720    by semicolons.  If the arg is a simple_stmt, the number of
721    small_stmt elements is returned.
722 */
723 
724 static string
new_type_comment(const char * s,struct compiling * c)725 new_type_comment(const char *s, struct compiling *c)
726 {
727     PyObject *res = PyUnicode_DecodeUTF8(s, strlen(s), NULL);
728     if (res == NULL)
729         return NULL;
730     if (PyArena_AddPyObject(c->c_arena, res) < 0) {
731         Py_DECREF(res);
732         return NULL;
733     }
734     return res;
735 }
736 #define NEW_TYPE_COMMENT(n) new_type_comment(STR(n), c)
737 
738 static int
num_stmts(const node * n)739 num_stmts(const node *n)
740 {
741     int i, l;
742     node *ch;
743 
744     switch (TYPE(n)) {
745         case single_input:
746             if (TYPE(CHILD(n, 0)) == NEWLINE)
747                 return 0;
748             else
749                 return num_stmts(CHILD(n, 0));
750         case file_input:
751             l = 0;
752             for (i = 0; i < NCH(n); i++) {
753                 ch = CHILD(n, i);
754                 if (TYPE(ch) == stmt)
755                     l += num_stmts(ch);
756             }
757             return l;
758         case stmt:
759             return num_stmts(CHILD(n, 0));
760         case compound_stmt:
761             return 1;
762         case simple_stmt:
763             return NCH(n) / 2; /* Divide by 2 to remove count of semi-colons */
764         case suite:
765         case func_body_suite:
766             /* func_body_suite: simple_stmt | NEWLINE [TYPE_COMMENT NEWLINE] INDENT stmt+ DEDENT */
767             /* suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT */
768             if (NCH(n) == 1)
769                 return num_stmts(CHILD(n, 0));
770             else {
771                 i = 2;
772                 l = 0;
773                 if (TYPE(CHILD(n, 1)) == TYPE_COMMENT)
774                     i += 2;
775                 for (; i < (NCH(n) - 1); i++)
776                     l += num_stmts(CHILD(n, i));
777                 return l;
778             }
779         default: {
780             char buf[128];
781 
782             sprintf(buf, "Non-statement found: %d %d",
783                     TYPE(n), NCH(n));
784             Py_FatalError(buf);
785         }
786     }
787     Py_UNREACHABLE();
788 }
789 
790 /* Transform the CST rooted at node * to the appropriate AST
791 */
792 
793 mod_ty
PyAST_FromNodeObject(const node * n,PyCompilerFlags * flags,PyObject * filename,PyArena * arena)794 PyAST_FromNodeObject(const node *n, PyCompilerFlags *flags,
795                      PyObject *filename, PyArena *arena)
796 {
797     int i, j, k, num;
798     asdl_seq *stmts = NULL;
799     asdl_seq *type_ignores = NULL;
800     stmt_ty s;
801     node *ch;
802     struct compiling c;
803     mod_ty res = NULL;
804     asdl_seq *argtypes = NULL;
805     expr_ty ret, arg;
806 
807     c.c_arena = arena;
808     /* borrowed reference */
809     c.c_filename = filename;
810     c.c_normalize = NULL;
811     c.c_feature_version = flags && (flags->cf_flags & PyCF_ONLY_AST) ?
812         flags->cf_feature_version : PY_MINOR_VERSION;
813 
814     if (TYPE(n) == encoding_decl)
815         n = CHILD(n, 0);
816 
817     k = 0;
818     switch (TYPE(n)) {
819         case file_input:
820             stmts = _Py_asdl_seq_new(num_stmts(n), arena);
821             if (!stmts)
822                 goto out;
823             for (i = 0; i < NCH(n) - 1; i++) {
824                 ch = CHILD(n, i);
825                 if (TYPE(ch) == NEWLINE)
826                     continue;
827                 REQ(ch, stmt);
828                 num = num_stmts(ch);
829                 if (num == 1) {
830                     s = ast_for_stmt(&c, ch);
831                     if (!s)
832                         goto out;
833                     asdl_seq_SET(stmts, k++, s);
834                 }
835                 else {
836                     ch = CHILD(ch, 0);
837                     REQ(ch, simple_stmt);
838                     for (j = 0; j < num; j++) {
839                         s = ast_for_stmt(&c, CHILD(ch, j * 2));
840                         if (!s)
841                             goto out;
842                         asdl_seq_SET(stmts, k++, s);
843                     }
844                 }
845             }
846 
847             /* Type ignores are stored under the ENDMARKER in file_input. */
848             ch = CHILD(n, NCH(n) - 1);
849             REQ(ch, ENDMARKER);
850             num = NCH(ch);
851             type_ignores = _Py_asdl_seq_new(num, arena);
852             if (!type_ignores)
853                 goto out;
854 
855             for (i = 0; i < num; i++) {
856                 string type_comment = new_type_comment(STR(CHILD(ch, i)), &c);
857                 if (!type_comment)
858                     goto out;
859                 type_ignore_ty ti = TypeIgnore(LINENO(CHILD(ch, i)), type_comment, arena);
860                 if (!ti)
861                    goto out;
862                asdl_seq_SET(type_ignores, i, ti);
863             }
864 
865             res = Module(stmts, type_ignores, arena);
866             break;
867         case eval_input: {
868             expr_ty testlist_ast;
869 
870             /* XXX Why not comp_for here? */
871             testlist_ast = ast_for_testlist(&c, CHILD(n, 0));
872             if (!testlist_ast)
873                 goto out;
874             res = Expression(testlist_ast, arena);
875             break;
876         }
877         case single_input:
878             if (TYPE(CHILD(n, 0)) == NEWLINE) {
879                 stmts = _Py_asdl_seq_new(1, arena);
880                 if (!stmts)
881                     goto out;
882                 asdl_seq_SET(stmts, 0, Pass(n->n_lineno, n->n_col_offset,
883                                             n->n_end_lineno, n->n_end_col_offset,
884                                             arena));
885                 if (!asdl_seq_GET(stmts, 0))
886                     goto out;
887                 res = Interactive(stmts, arena);
888             }
889             else {
890                 n = CHILD(n, 0);
891                 num = num_stmts(n);
892                 stmts = _Py_asdl_seq_new(num, arena);
893                 if (!stmts)
894                     goto out;
895                 if (num == 1) {
896                     s = ast_for_stmt(&c, n);
897                     if (!s)
898                         goto out;
899                     asdl_seq_SET(stmts, 0, s);
900                 }
901                 else {
902                     /* Only a simple_stmt can contain multiple statements. */
903                     REQ(n, simple_stmt);
904                     for (i = 0; i < NCH(n); i += 2) {
905                         if (TYPE(CHILD(n, i)) == NEWLINE)
906                             break;
907                         s = ast_for_stmt(&c, CHILD(n, i));
908                         if (!s)
909                             goto out;
910                         asdl_seq_SET(stmts, i / 2, s);
911                     }
912                 }
913 
914                 res = Interactive(stmts, arena);
915             }
916             break;
917         case func_type_input:
918             n = CHILD(n, 0);
919             REQ(n, func_type);
920 
921             if (TYPE(CHILD(n, 1)) == typelist) {
922                 ch = CHILD(n, 1);
923                 /* this is overly permissive -- we don't pay any attention to
924                  * stars on the args -- just parse them into an ordered list */
925                 num = 0;
926                 for (i = 0; i < NCH(ch); i++) {
927                     if (TYPE(CHILD(ch, i)) == test) {
928                         num++;
929                     }
930                 }
931 
932                 argtypes = _Py_asdl_seq_new(num, arena);
933                 if (!argtypes)
934                     goto out;
935 
936                 j = 0;
937                 for (i = 0; i < NCH(ch); i++) {
938                     if (TYPE(CHILD(ch, i)) == test) {
939                         arg = ast_for_expr(&c, CHILD(ch, i));
940                         if (!arg)
941                             goto out;
942                         asdl_seq_SET(argtypes, j++, arg);
943                     }
944                 }
945             }
946             else {
947                 argtypes = _Py_asdl_seq_new(0, arena);
948                 if (!argtypes)
949                     goto out;
950             }
951 
952             ret = ast_for_expr(&c, CHILD(n, NCH(n) - 1));
953             if (!ret)
954                 goto out;
955             res = FunctionType(argtypes, ret, arena);
956             break;
957         default:
958             PyErr_Format(PyExc_SystemError,
959                          "invalid node %d for PyAST_FromNode", TYPE(n));
960             goto out;
961     }
962  out:
963     if (c.c_normalize) {
964         Py_DECREF(c.c_normalize);
965     }
966     return res;
967 }
968 
969 mod_ty
PyAST_FromNode(const node * n,PyCompilerFlags * flags,const char * filename_str,PyArena * arena)970 PyAST_FromNode(const node *n, PyCompilerFlags *flags, const char *filename_str,
971                PyArena *arena)
972 {
973     mod_ty mod;
974     PyObject *filename;
975     filename = PyUnicode_DecodeFSDefault(filename_str);
976     if (filename == NULL)
977         return NULL;
978     mod = PyAST_FromNodeObject(n, flags, filename, arena);
979     Py_DECREF(filename);
980     return mod;
981 
982 }
983 
984 /* Return the AST repr. of the operator represented as syntax (|, ^, etc.)
985 */
986 
987 static operator_ty
get_operator(struct compiling * c,const node * n)988 get_operator(struct compiling *c, const node *n)
989 {
990     switch (TYPE(n)) {
991         case VBAR:
992             return BitOr;
993         case CIRCUMFLEX:
994             return BitXor;
995         case AMPER:
996             return BitAnd;
997         case LEFTSHIFT:
998             return LShift;
999         case RIGHTSHIFT:
1000             return RShift;
1001         case PLUS:
1002             return Add;
1003         case MINUS:
1004             return Sub;
1005         case STAR:
1006             return Mult;
1007         case AT:
1008             if (c->c_feature_version < 5) {
1009                 ast_error(c, n,
1010                           "The '@' operator is only supported in Python 3.5 and greater");
1011                 return (operator_ty)0;
1012             }
1013             return MatMult;
1014         case SLASH:
1015             return Div;
1016         case DOUBLESLASH:
1017             return FloorDiv;
1018         case PERCENT:
1019             return Mod;
1020         default:
1021             return (operator_ty)0;
1022     }
1023 }
1024 
1025 static const char * const FORBIDDEN[] = {
1026     "None",
1027     "True",
1028     "False",
1029     "__debug__",
1030     NULL,
1031 };
1032 
1033 static int
forbidden_name(struct compiling * c,identifier name,const node * n,int full_checks)1034 forbidden_name(struct compiling *c, identifier name, const node *n,
1035                int full_checks)
1036 {
1037     assert(PyUnicode_Check(name));
1038     const char * const *p = FORBIDDEN;
1039     if (!full_checks) {
1040         /* In most cases, the parser will protect True, False, and None
1041            from being assign to. */
1042         p += 3;
1043     }
1044     for (; *p; p++) {
1045         if (_PyUnicode_EqualToASCIIString(name, *p)) {
1046             ast_error(c, n, "cannot assign to %U", name);
1047             return 1;
1048         }
1049     }
1050     return 0;
1051 }
1052 
1053 static expr_ty
copy_location(expr_ty e,const node * n,const node * end)1054 copy_location(expr_ty e, const node *n, const node *end)
1055 {
1056     if (e) {
1057         e->lineno = LINENO(n);
1058         e->col_offset = n->n_col_offset;
1059         e->end_lineno = end->n_end_lineno;
1060         e->end_col_offset = end->n_end_col_offset;
1061     }
1062     return e;
1063 }
1064 
1065 static const char *
get_expr_name(expr_ty e)1066 get_expr_name(expr_ty e)
1067 {
1068     switch (e->kind) {
1069         case Attribute_kind:
1070             return "attribute";
1071         case Subscript_kind:
1072             return "subscript";
1073         case Starred_kind:
1074             return "starred";
1075         case Name_kind:
1076             return "name";
1077         case List_kind:
1078             return "list";
1079         case Tuple_kind:
1080             return "tuple";
1081         case Lambda_kind:
1082             return "lambda";
1083         case Call_kind:
1084             return "function call";
1085         case BoolOp_kind:
1086         case BinOp_kind:
1087         case UnaryOp_kind:
1088             return "operator";
1089         case GeneratorExp_kind:
1090             return "generator expression";
1091         case Yield_kind:
1092         case YieldFrom_kind:
1093             return "yield expression";
1094         case Await_kind:
1095             return "await expression";
1096         case ListComp_kind:
1097             return "list comprehension";
1098         case SetComp_kind:
1099             return "set comprehension";
1100         case DictComp_kind:
1101             return "dict comprehension";
1102         case Dict_kind:
1103             return "dict display";
1104         case Set_kind:
1105             return "set display";
1106         case JoinedStr_kind:
1107         case FormattedValue_kind:
1108             return "f-string expression";
1109         case Constant_kind: {
1110             PyObject *value = e->v.Constant.value;
1111             if (value == Py_None) {
1112                 return "None";
1113             }
1114             if (value == Py_False) {
1115                 return "False";
1116             }
1117             if (value == Py_True) {
1118                 return "True";
1119             }
1120             if (value == Py_Ellipsis) {
1121                 return "Ellipsis";
1122             }
1123             return "literal";
1124         }
1125         case Compare_kind:
1126             return "comparison";
1127         case IfExp_kind:
1128             return "conditional expression";
1129         case NamedExpr_kind:
1130             return "named expression";
1131         default:
1132             PyErr_Format(PyExc_SystemError,
1133                          "unexpected expression in assignment %d (line %d)",
1134                          e->kind, e->lineno);
1135             return NULL;
1136     }
1137 }
1138 
1139 /* Set the context ctx for expr_ty e, recursively traversing e.
1140 
1141    Only sets context for expr kinds that "can appear in assignment context"
1142    (according to ../Parser/Python.asdl).  For other expr kinds, it sets
1143    an appropriate syntax error and returns false.
1144 */
1145 
1146 static int
set_context(struct compiling * c,expr_ty e,expr_context_ty ctx,const node * n)1147 set_context(struct compiling *c, expr_ty e, expr_context_ty ctx, const node *n)
1148 {
1149     asdl_seq *s = NULL;
1150 
1151     /* The ast defines augmented store and load contexts, but the
1152        implementation here doesn't actually use them.  The code may be
1153        a little more complex than necessary as a result.  It also means
1154        that expressions in an augmented assignment have a Store context.
1155        Consider restructuring so that augmented assignment uses
1156        set_context(), too.
1157     */
1158     assert(ctx != AugStore && ctx != AugLoad);
1159 
1160     switch (e->kind) {
1161         case Attribute_kind:
1162             e->v.Attribute.ctx = ctx;
1163             if (ctx == Store && forbidden_name(c, e->v.Attribute.attr, n, 1))
1164                 return 0;
1165             break;
1166         case Subscript_kind:
1167             e->v.Subscript.ctx = ctx;
1168             break;
1169         case Starred_kind:
1170             e->v.Starred.ctx = ctx;
1171             if (!set_context(c, e->v.Starred.value, ctx, n))
1172                 return 0;
1173             break;
1174         case Name_kind:
1175             if (ctx == Store) {
1176                 if (forbidden_name(c, e->v.Name.id, n, 0))
1177                     return 0; /* forbidden_name() calls ast_error() */
1178             }
1179             e->v.Name.ctx = ctx;
1180             break;
1181         case List_kind:
1182             e->v.List.ctx = ctx;
1183             s = e->v.List.elts;
1184             break;
1185         case Tuple_kind:
1186             e->v.Tuple.ctx = ctx;
1187             s = e->v.Tuple.elts;
1188             break;
1189         default: {
1190             const char *expr_name = get_expr_name(e);
1191             if (expr_name != NULL) {
1192                 ast_error(c, n, "cannot %s %s",
1193                           ctx == Store ? "assign to" : "delete",
1194                           expr_name);
1195             }
1196             return 0;
1197         }
1198     }
1199 
1200     /* If the LHS is a list or tuple, we need to set the assignment
1201        context for all the contained elements.
1202     */
1203     if (s) {
1204         Py_ssize_t i;
1205 
1206         for (i = 0; i < asdl_seq_LEN(s); i++) {
1207             if (!set_context(c, (expr_ty)asdl_seq_GET(s, i), ctx, n))
1208                 return 0;
1209         }
1210     }
1211     return 1;
1212 }
1213 
1214 static operator_ty
ast_for_augassign(struct compiling * c,const node * n)1215 ast_for_augassign(struct compiling *c, const node *n)
1216 {
1217     REQ(n, augassign);
1218     n = CHILD(n, 0);
1219     switch (STR(n)[0]) {
1220         case '+':
1221             return Add;
1222         case '-':
1223             return Sub;
1224         case '/':
1225             if (STR(n)[1] == '/')
1226                 return FloorDiv;
1227             else
1228                 return Div;
1229         case '%':
1230             return Mod;
1231         case '<':
1232             return LShift;
1233         case '>':
1234             return RShift;
1235         case '&':
1236             return BitAnd;
1237         case '^':
1238             return BitXor;
1239         case '|':
1240             return BitOr;
1241         case '*':
1242             if (STR(n)[1] == '*')
1243                 return Pow;
1244             else
1245                 return Mult;
1246         case '@':
1247             if (c->c_feature_version < 5) {
1248                 ast_error(c, n,
1249                           "The '@' operator is only supported in Python 3.5 and greater");
1250                 return (operator_ty)0;
1251             }
1252             return MatMult;
1253         default:
1254             PyErr_Format(PyExc_SystemError, "invalid augassign: %s", STR(n));
1255             return (operator_ty)0;
1256     }
1257 }
1258 
1259 static cmpop_ty
ast_for_comp_op(struct compiling * c,const node * n)1260 ast_for_comp_op(struct compiling *c, const node *n)
1261 {
1262     /* comp_op: '<'|'>'|'=='|'>='|'<='|'!='|'in'|'not' 'in'|'is'
1263                |'is' 'not'
1264     */
1265     REQ(n, comp_op);
1266     if (NCH(n) == 1) {
1267         n = CHILD(n, 0);
1268         switch (TYPE(n)) {
1269             case LESS:
1270                 return Lt;
1271             case GREATER:
1272                 return Gt;
1273             case EQEQUAL:                       /* == */
1274                 return Eq;
1275             case LESSEQUAL:
1276                 return LtE;
1277             case GREATEREQUAL:
1278                 return GtE;
1279             case NOTEQUAL:
1280                 return NotEq;
1281             case NAME:
1282                 if (strcmp(STR(n), "in") == 0)
1283                     return In;
1284                 if (strcmp(STR(n), "is") == 0)
1285                     return Is;
1286                 /* fall through */
1287             default:
1288                 PyErr_Format(PyExc_SystemError, "invalid comp_op: %s",
1289                              STR(n));
1290                 return (cmpop_ty)0;
1291         }
1292     }
1293     else if (NCH(n) == 2) {
1294         /* handle "not in" and "is not" */
1295         switch (TYPE(CHILD(n, 0))) {
1296             case NAME:
1297                 if (strcmp(STR(CHILD(n, 1)), "in") == 0)
1298                     return NotIn;
1299                 if (strcmp(STR(CHILD(n, 0)), "is") == 0)
1300                     return IsNot;
1301                 /* fall through */
1302             default:
1303                 PyErr_Format(PyExc_SystemError, "invalid comp_op: %s %s",
1304                              STR(CHILD(n, 0)), STR(CHILD(n, 1)));
1305                 return (cmpop_ty)0;
1306         }
1307     }
1308     PyErr_Format(PyExc_SystemError, "invalid comp_op: has %d children",
1309                  NCH(n));
1310     return (cmpop_ty)0;
1311 }
1312 
1313 static asdl_seq *
seq_for_testlist(struct compiling * c,const node * n)1314 seq_for_testlist(struct compiling *c, const node *n)
1315 {
1316     /* testlist: test (',' test)* [',']
1317        testlist_star_expr: test|star_expr (',' test|star_expr)* [',']
1318     */
1319     asdl_seq *seq;
1320     expr_ty expression;
1321     int i;
1322     assert(TYPE(n) == testlist || TYPE(n) == testlist_star_expr || TYPE(n) == testlist_comp);
1323 
1324     seq = _Py_asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
1325     if (!seq)
1326         return NULL;
1327 
1328     for (i = 0; i < NCH(n); i += 2) {
1329         const node *ch = CHILD(n, i);
1330         assert(TYPE(ch) == test || TYPE(ch) == test_nocond || TYPE(ch) == star_expr || TYPE(ch) == namedexpr_test);
1331 
1332         expression = ast_for_expr(c, ch);
1333         if (!expression)
1334             return NULL;
1335 
1336         assert(i / 2 < seq->size);
1337         asdl_seq_SET(seq, i / 2, expression);
1338     }
1339     return seq;
1340 }
1341 
1342 static arg_ty
ast_for_arg(struct compiling * c,const node * n)1343 ast_for_arg(struct compiling *c, const node *n)
1344 {
1345     identifier name;
1346     expr_ty annotation = NULL;
1347     node *ch;
1348     arg_ty ret;
1349 
1350     assert(TYPE(n) == tfpdef || TYPE(n) == vfpdef);
1351     ch = CHILD(n, 0);
1352     name = NEW_IDENTIFIER(ch);
1353     if (!name)
1354         return NULL;
1355     if (forbidden_name(c, name, ch, 0))
1356         return NULL;
1357 
1358     if (NCH(n) == 3 && TYPE(CHILD(n, 1)) == COLON) {
1359         annotation = ast_for_expr(c, CHILD(n, 2));
1360         if (!annotation)
1361             return NULL;
1362     }
1363 
1364     ret = arg(name, annotation, NULL, LINENO(n), n->n_col_offset,
1365               n->n_end_lineno, n->n_end_col_offset, c->c_arena);
1366     if (!ret)
1367         return NULL;
1368     return ret;
1369 }
1370 
1371 /* returns -1 if failed to handle keyword only arguments
1372    returns new position to keep processing if successful
1373                (',' tfpdef ['=' test])*
1374                      ^^^
1375    start pointing here
1376  */
1377 static int
handle_keywordonly_args(struct compiling * c,const node * n,int start,asdl_seq * kwonlyargs,asdl_seq * kwdefaults)1378 handle_keywordonly_args(struct compiling *c, const node *n, int start,
1379                         asdl_seq *kwonlyargs, asdl_seq *kwdefaults)
1380 {
1381     PyObject *argname;
1382     node *ch;
1383     expr_ty expression, annotation;
1384     arg_ty arg = NULL;
1385     int i = start;
1386     int j = 0; /* index for kwdefaults and kwonlyargs */
1387 
1388     if (kwonlyargs == NULL) {
1389         ast_error(c, CHILD(n, start), "named arguments must follow bare *");
1390         return -1;
1391     }
1392     assert(kwdefaults != NULL);
1393     while (i < NCH(n)) {
1394         ch = CHILD(n, i);
1395         switch (TYPE(ch)) {
1396             case vfpdef:
1397             case tfpdef:
1398                 if (i + 1 < NCH(n) && TYPE(CHILD(n, i + 1)) == EQUAL) {
1399                     expression = ast_for_expr(c, CHILD(n, i + 2));
1400                     if (!expression)
1401                         goto error;
1402                     asdl_seq_SET(kwdefaults, j, expression);
1403                     i += 2; /* '=' and test */
1404                 }
1405                 else { /* setting NULL if no default value exists */
1406                     asdl_seq_SET(kwdefaults, j, NULL);
1407                 }
1408                 if (NCH(ch) == 3) {
1409                     /* ch is NAME ':' test */
1410                     annotation = ast_for_expr(c, CHILD(ch, 2));
1411                     if (!annotation)
1412                         goto error;
1413                 }
1414                 else {
1415                     annotation = NULL;
1416                 }
1417                 ch = CHILD(ch, 0);
1418                 argname = NEW_IDENTIFIER(ch);
1419                 if (!argname)
1420                     goto error;
1421                 if (forbidden_name(c, argname, ch, 0))
1422                     goto error;
1423                 arg = arg(argname, annotation, NULL, LINENO(ch), ch->n_col_offset,
1424                           ch->n_end_lineno, ch->n_end_col_offset,
1425                           c->c_arena);
1426                 if (!arg)
1427                     goto error;
1428                 asdl_seq_SET(kwonlyargs, j++, arg);
1429                 i += 1; /* the name */
1430                 if (i < NCH(n) && TYPE(CHILD(n, i)) == COMMA)
1431                     i += 1; /* the comma, if present */
1432                 break;
1433             case TYPE_COMMENT:
1434                 /* arg will be equal to the last argument processed */
1435                 arg->type_comment = NEW_TYPE_COMMENT(ch);
1436                 if (!arg->type_comment)
1437                     goto error;
1438                 i += 1;
1439                 break;
1440             case DOUBLESTAR:
1441                 return i;
1442             default:
1443                 ast_error(c, ch, "unexpected node");
1444                 goto error;
1445         }
1446     }
1447     return i;
1448  error:
1449     return -1;
1450 }
1451 
1452 /* Create AST for argument list. */
1453 
1454 static arguments_ty
ast_for_arguments(struct compiling * c,const node * n)1455 ast_for_arguments(struct compiling *c, const node *n)
1456 {
1457     /* This function handles both typedargslist (function definition)
1458        and varargslist (lambda definition).
1459 
1460        parameters: '(' [typedargslist] ')'
1461 
1462        The following definition for typedarglist is equivalent to this set of rules:
1463 
1464          arguments = argument (',' [TYPE_COMMENT] argument)*
1465          argument = tfpdef ['=' test]
1466          kwargs = '**' tfpdef [','] [TYPE_COMMENT]
1467          args = '*' [tfpdef]
1468          kwonly_kwargs = (',' [TYPE_COMMENT] argument)* (TYPE_COMMENT | [','
1469                          [TYPE_COMMENT] [kwargs]])
1470          args_kwonly_kwargs = args kwonly_kwargs | kwargs
1471          poskeyword_args_kwonly_kwargs = arguments ( TYPE_COMMENT | [','
1472                                          [TYPE_COMMENT] [args_kwonly_kwargs]])
1473          typedargslist_no_posonly  = poskeyword_args_kwonly_kwargs | args_kwonly_kwargs
1474          typedarglist = (arguments ',' [TYPE_COMMENT] '/' [',' [[TYPE_COMMENT]
1475                         typedargslist_no_posonly]])|(typedargslist_no_posonly)"
1476 
1477        typedargslist: ( (tfpdef ['=' test] (',' [TYPE_COMMENT] tfpdef ['=' test])*
1478            ',' [TYPE_COMMENT] '/' [',' [ [TYPE_COMMENT] tfpdef ['=' test] ( ','
1479            [TYPE_COMMENT] tfpdef ['=' test])* (TYPE_COMMENT | [',' [TYPE_COMMENT] [ '*'
1480            [tfpdef] (',' [TYPE_COMMENT] tfpdef ['=' test])* (TYPE_COMMENT | [','
1481            [TYPE_COMMENT] ['**' tfpdef [','] [TYPE_COMMENT]]]) | '**' tfpdef [',']
1482            [TYPE_COMMENT]]]) | '*' [tfpdef] (',' [TYPE_COMMENT] tfpdef ['=' test])*
1483            (TYPE_COMMENT | [',' [TYPE_COMMENT] ['**' tfpdef [','] [TYPE_COMMENT]]]) |
1484            '**' tfpdef [','] [TYPE_COMMENT]]] ) |  (tfpdef ['=' test] (','
1485            [TYPE_COMMENT] tfpdef ['=' test])* (TYPE_COMMENT | [',' [TYPE_COMMENT] [ '*'
1486            [tfpdef] (',' [TYPE_COMMENT] tfpdef ['=' test])* (TYPE_COMMENT | [','
1487            [TYPE_COMMENT] ['**' tfpdef [','] [TYPE_COMMENT]]]) | '**' tfpdef [',']
1488            [TYPE_COMMENT]]]) | '*' [tfpdef] (',' [TYPE_COMMENT] tfpdef ['=' test])*
1489            (TYPE_COMMENT | [',' [TYPE_COMMENT] ['**' tfpdef [','] [TYPE_COMMENT]]]) |
1490            '**' tfpdef [','] [TYPE_COMMENT]))
1491 
1492        tfpdef: NAME [':' test]
1493 
1494        The following definition for varargslist is equivalent to this set of rules:
1495 
1496          arguments = argument (',' argument )*
1497          argument = vfpdef ['=' test]
1498          kwargs = '**' vfpdef [',']
1499          args = '*' [vfpdef]
1500          kwonly_kwargs = (',' argument )* [',' [kwargs]]
1501          args_kwonly_kwargs = args kwonly_kwargs | kwargs
1502          poskeyword_args_kwonly_kwargs = arguments [',' [args_kwonly_kwargs]]
1503          vararglist_no_posonly = poskeyword_args_kwonly_kwargs | args_kwonly_kwargs
1504          varargslist = arguments ',' '/' [','[(vararglist_no_posonly)]] |
1505                        (vararglist_no_posonly)
1506 
1507        varargslist: vfpdef ['=' test ](',' vfpdef ['=' test])* ',' '/' [',' [ (vfpdef ['='
1508            test] (',' vfpdef ['=' test])* [',' [ '*' [vfpdef] (',' vfpdef ['=' test])* [','
1509            ['**' vfpdef [',']]] | '**' vfpdef [',']]] | '*' [vfpdef] (',' vfpdef ['=' test])*
1510            [',' ['**' vfpdef [',']]] | '**' vfpdef [',']) ]] | (vfpdef ['=' test] (',' vfpdef
1511            ['=' test])* [',' [ '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
1512            | '**' vfpdef [',']]] | '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef
1513            [',']]] | '**' vfpdef [','])
1514 
1515        vfpdef: NAME
1516 
1517     */
1518     int i, j, k, l, nposonlyargs=0, nposargs = 0, nkwonlyargs = 0;
1519     int nposdefaults = 0, found_default = 0;
1520     asdl_seq *posonlyargs, *posargs, *posdefaults, *kwonlyargs, *kwdefaults;
1521     arg_ty vararg = NULL, kwarg = NULL;
1522     arg_ty arg = NULL;
1523     node *ch;
1524 
1525     if (TYPE(n) == parameters) {
1526         if (NCH(n) == 2) /* () as argument list */
1527             return arguments(NULL, NULL, NULL, NULL, NULL, NULL, NULL, c->c_arena);
1528         n = CHILD(n, 1);
1529     }
1530     assert(TYPE(n) == typedargslist || TYPE(n) == varargslist);
1531 
1532     /* First count the number of positional args & defaults.  The
1533        variable i is the loop index for this for loop and the next.
1534        The next loop picks up where the first leaves off.
1535     */
1536     for (i = 0; i < NCH(n); i++) {
1537         ch = CHILD(n, i);
1538         if (TYPE(ch) == STAR) {
1539             /* skip star */
1540             i++;
1541             if (i < NCH(n) && /* skip argument following star */
1542                 (TYPE(CHILD(n, i)) == tfpdef ||
1543                  TYPE(CHILD(n, i)) == vfpdef)) {
1544                 i++;
1545             }
1546             break;
1547         }
1548         if (TYPE(ch) == DOUBLESTAR) break;
1549         if (TYPE(ch) == vfpdef || TYPE(ch) == tfpdef) nposargs++;
1550         if (TYPE(ch) == EQUAL) nposdefaults++;
1551         if (TYPE(ch) == SLASH ) {
1552             nposonlyargs = nposargs;
1553             nposargs = 0;
1554         }
1555     }
1556     /* count the number of keyword only args &
1557        defaults for keyword only args */
1558     for ( ; i < NCH(n); ++i) {
1559         ch = CHILD(n, i);
1560         if (TYPE(ch) == DOUBLESTAR) break;
1561         if (TYPE(ch) == tfpdef || TYPE(ch) == vfpdef) nkwonlyargs++;
1562     }
1563     posonlyargs = (nposonlyargs ? _Py_asdl_seq_new(nposonlyargs, c->c_arena) : NULL);
1564     if (!posonlyargs && nposonlyargs) {
1565         return NULL;
1566     }
1567     posargs = (nposargs ? _Py_asdl_seq_new(nposargs, c->c_arena) : NULL);
1568     if (!posargs && nposargs)
1569         return NULL;
1570     kwonlyargs = (nkwonlyargs ?
1571                    _Py_asdl_seq_new(nkwonlyargs, c->c_arena) : NULL);
1572     if (!kwonlyargs && nkwonlyargs)
1573         return NULL;
1574     posdefaults = (nposdefaults ?
1575                     _Py_asdl_seq_new(nposdefaults, c->c_arena) : NULL);
1576     if (!posdefaults && nposdefaults)
1577         return NULL;
1578     /* The length of kwonlyargs and kwdefaults are same
1579        since we set NULL as default for keyword only argument w/o default
1580        - we have sequence data structure, but no dictionary */
1581     kwdefaults = (nkwonlyargs ?
1582                    _Py_asdl_seq_new(nkwonlyargs, c->c_arena) : NULL);
1583     if (!kwdefaults && nkwonlyargs)
1584         return NULL;
1585 
1586     /* tfpdef: NAME [':' test]
1587        vfpdef: NAME
1588     */
1589     i = 0;
1590     j = 0;  /* index for defaults */
1591     k = 0;  /* index for args */
1592     l = 0;  /* index for posonlyargs */
1593     while (i < NCH(n)) {
1594         ch = CHILD(n, i);
1595         switch (TYPE(ch)) {
1596             case tfpdef:
1597             case vfpdef:
1598                 /* XXX Need to worry about checking if TYPE(CHILD(n, i+1)) is
1599                    anything other than EQUAL or a comma? */
1600                 /* XXX Should NCH(n) check be made a separate check? */
1601                 if (i + 1 < NCH(n) && TYPE(CHILD(n, i + 1)) == EQUAL) {
1602                     expr_ty expression = ast_for_expr(c, CHILD(n, i + 2));
1603                     if (!expression)
1604                         return NULL;
1605                     assert(posdefaults != NULL);
1606                     asdl_seq_SET(posdefaults, j++, expression);
1607                     i += 2;
1608                     found_default = 1;
1609                 }
1610                 else if (found_default) {
1611                     ast_error(c, n,
1612                               "non-default argument follows default argument");
1613                     return NULL;
1614                 }
1615                 arg = ast_for_arg(c, ch);
1616                 if (!arg)
1617                     return NULL;
1618                 if (l < nposonlyargs) {
1619                     asdl_seq_SET(posonlyargs, l++, arg);
1620                 } else {
1621                     asdl_seq_SET(posargs, k++, arg);
1622                 }
1623                 i += 1; /* the name */
1624                 if (i < NCH(n) && TYPE(CHILD(n, i)) == COMMA)
1625                     i += 1; /* the comma, if present */
1626                 break;
1627              case SLASH:
1628                 /* Advance the slash and the comma. If there are more names
1629                  * after the slash there will be a comma so we are advancing
1630                  * the correct number of nodes. If the slash is the last item,
1631                  * we will be advancing an extra token but then * i > NCH(n)
1632                  * and the enclosing while will finish correctly. */
1633                 i += 2;
1634                 break;
1635             case STAR:
1636                 if (i+1 >= NCH(n) ||
1637                     (i+2 == NCH(n) && (TYPE(CHILD(n, i+1)) == COMMA
1638                                        || TYPE(CHILD(n, i+1)) == TYPE_COMMENT))) {
1639                     ast_error(c, CHILD(n, i),
1640                               "named arguments must follow bare *");
1641                     return NULL;
1642                 }
1643                 ch = CHILD(n, i+1);  /* tfpdef or COMMA */
1644                 if (TYPE(ch) == COMMA) {
1645                     int res = 0;
1646                     i += 2; /* now follows keyword only arguments */
1647 
1648                     if (i < NCH(n) && TYPE(CHILD(n, i)) == TYPE_COMMENT) {
1649                         ast_error(c, CHILD(n, i),
1650                                   "bare * has associated type comment");
1651                         return NULL;
1652                     }
1653 
1654                     res = handle_keywordonly_args(c, n, i,
1655                                                   kwonlyargs, kwdefaults);
1656                     if (res == -1) return NULL;
1657                     i = res; /* res has new position to process */
1658                 }
1659                 else {
1660                     vararg = ast_for_arg(c, ch);
1661                     if (!vararg)
1662                         return NULL;
1663 
1664                 i += 2; /* the star and the name */
1665                 if (i < NCH(n) && TYPE(CHILD(n, i)) == COMMA)
1666                     i += 1; /* the comma, if present */
1667 
1668                 if (i < NCH(n) && TYPE(CHILD(n, i)) == TYPE_COMMENT) {
1669                         vararg->type_comment = NEW_TYPE_COMMENT(CHILD(n, i));
1670                         if (!vararg->type_comment)
1671                             return NULL;
1672                         i += 1;
1673                     }
1674 
1675                     if (i < NCH(n) && (TYPE(CHILD(n, i)) == tfpdef
1676                                     || TYPE(CHILD(n, i)) == vfpdef)) {
1677                         int res = 0;
1678                         res = handle_keywordonly_args(c, n, i,
1679                                                       kwonlyargs, kwdefaults);
1680                         if (res == -1) return NULL;
1681                         i = res; /* res has new position to process */
1682                     }
1683                 }
1684                 break;
1685             case DOUBLESTAR:
1686                 ch = CHILD(n, i+1);  /* tfpdef */
1687                 assert(TYPE(ch) == tfpdef || TYPE(ch) == vfpdef);
1688                 kwarg = ast_for_arg(c, ch);
1689                 if (!kwarg)
1690                     return NULL;
1691                 i += 2; /* the double star and the name */
1692                 if (i < NCH(n) && TYPE(CHILD(n, i)) == COMMA)
1693                     i += 1; /* the comma, if present */
1694                 break;
1695             case TYPE_COMMENT:
1696                 assert(i);
1697 
1698                 if (kwarg)
1699                     arg = kwarg;
1700 
1701                 /* arg will be equal to the last argument processed */
1702                 arg->type_comment = NEW_TYPE_COMMENT(ch);
1703                 if (!arg->type_comment)
1704                     return NULL;
1705                 i += 1;
1706                 break;
1707             default:
1708                 PyErr_Format(PyExc_SystemError,
1709                              "unexpected node in varargslist: %d @ %d",
1710                              TYPE(ch), i);
1711                 return NULL;
1712         }
1713     }
1714     return arguments(posonlyargs, posargs, vararg, kwonlyargs, kwdefaults, kwarg, posdefaults, c->c_arena);
1715 }
1716 
1717 static expr_ty
ast_for_dotted_name(struct compiling * c,const node * n)1718 ast_for_dotted_name(struct compiling *c, const node *n)
1719 {
1720     expr_ty e;
1721     identifier id;
1722     int lineno, col_offset;
1723     int i;
1724     node *ch;
1725 
1726     REQ(n, dotted_name);
1727 
1728     lineno = LINENO(n);
1729     col_offset = n->n_col_offset;
1730 
1731     ch = CHILD(n, 0);
1732     id = NEW_IDENTIFIER(ch);
1733     if (!id)
1734         return NULL;
1735     e = Name(id, Load, lineno, col_offset,
1736              ch->n_end_lineno, ch->n_end_col_offset, c->c_arena);
1737     if (!e)
1738         return NULL;
1739 
1740     for (i = 2; i < NCH(n); i+=2) {
1741         const node *child = CHILD(n, i);
1742         id = NEW_IDENTIFIER(child);
1743         if (!id)
1744             return NULL;
1745         e = Attribute(e, id, Load, lineno, col_offset,
1746                       child->n_end_lineno, child->n_end_col_offset, c->c_arena);
1747         if (!e)
1748             return NULL;
1749     }
1750 
1751     return e;
1752 }
1753 
1754 static expr_ty
ast_for_decorator(struct compiling * c,const node * n)1755 ast_for_decorator(struct compiling *c, const node *n)
1756 {
1757     /* decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE */
1758     expr_ty d = NULL;
1759     expr_ty name_expr;
1760 
1761     REQ(n, decorator);
1762     REQ(CHILD(n, 0), AT);
1763     REQ(RCHILD(n, -1), NEWLINE);
1764 
1765     name_expr = ast_for_dotted_name(c, CHILD(n, 1));
1766     if (!name_expr)
1767         return NULL;
1768 
1769     if (NCH(n) == 3) { /* No arguments */
1770         d = name_expr;
1771         name_expr = NULL;
1772     }
1773     else if (NCH(n) == 5) { /* Call with no arguments */
1774         d = Call(name_expr, NULL, NULL,
1775                  name_expr->lineno, name_expr->col_offset,
1776                  CHILD(n, 3)->n_end_lineno, CHILD(n, 3)->n_end_col_offset,
1777                  c->c_arena);
1778         if (!d)
1779             return NULL;
1780         name_expr = NULL;
1781     }
1782     else {
1783         d = ast_for_call(c, CHILD(n, 3), name_expr,
1784                          CHILD(n, 1), CHILD(n, 2), CHILD(n, 4));
1785         if (!d)
1786             return NULL;
1787         name_expr = NULL;
1788     }
1789 
1790     return d;
1791 }
1792 
1793 static asdl_seq*
ast_for_decorators(struct compiling * c,const node * n)1794 ast_for_decorators(struct compiling *c, const node *n)
1795 {
1796     asdl_seq* decorator_seq;
1797     expr_ty d;
1798     int i;
1799 
1800     REQ(n, decorators);
1801     decorator_seq = _Py_asdl_seq_new(NCH(n), c->c_arena);
1802     if (!decorator_seq)
1803         return NULL;
1804 
1805     for (i = 0; i < NCH(n); i++) {
1806         d = ast_for_decorator(c, CHILD(n, i));
1807         if (!d)
1808             return NULL;
1809         asdl_seq_SET(decorator_seq, i, d);
1810     }
1811     return decorator_seq;
1812 }
1813 
1814 static stmt_ty
ast_for_funcdef_impl(struct compiling * c,const node * n0,asdl_seq * decorator_seq,bool is_async)1815 ast_for_funcdef_impl(struct compiling *c, const node *n0,
1816                      asdl_seq *decorator_seq, bool is_async)
1817 {
1818     /* funcdef: 'def' NAME parameters ['->' test] ':' [TYPE_COMMENT] suite */
1819     const node * const n = is_async ? CHILD(n0, 1) : n0;
1820     identifier name;
1821     arguments_ty args;
1822     asdl_seq *body;
1823     expr_ty returns = NULL;
1824     int name_i = 1;
1825     int end_lineno, end_col_offset;
1826     node *tc;
1827     string type_comment = NULL;
1828 
1829     if (is_async && c->c_feature_version < 5) {
1830         ast_error(c, n,
1831                   "Async functions are only supported in Python 3.5 and greater");
1832         return NULL;
1833     }
1834 
1835     REQ(n, funcdef);
1836 
1837     name = NEW_IDENTIFIER(CHILD(n, name_i));
1838     if (!name)
1839         return NULL;
1840     if (forbidden_name(c, name, CHILD(n, name_i), 0))
1841         return NULL;
1842     args = ast_for_arguments(c, CHILD(n, name_i + 1));
1843     if (!args)
1844         return NULL;
1845     if (TYPE(CHILD(n, name_i+2)) == RARROW) {
1846         returns = ast_for_expr(c, CHILD(n, name_i + 3));
1847         if (!returns)
1848             return NULL;
1849         name_i += 2;
1850     }
1851     if (TYPE(CHILD(n, name_i + 3)) == TYPE_COMMENT) {
1852         type_comment = NEW_TYPE_COMMENT(CHILD(n, name_i + 3));
1853         if (!type_comment)
1854             return NULL;
1855         name_i += 1;
1856     }
1857     body = ast_for_suite(c, CHILD(n, name_i + 3));
1858     if (!body)
1859         return NULL;
1860     get_last_end_pos(body, &end_lineno, &end_col_offset);
1861 
1862     if (NCH(CHILD(n, name_i + 3)) > 1) {
1863         /* Check if the suite has a type comment in it. */
1864         tc = CHILD(CHILD(n, name_i + 3), 1);
1865 
1866         if (TYPE(tc) == TYPE_COMMENT) {
1867             if (type_comment != NULL) {
1868                 ast_error(c, n, "Cannot have two type comments on def");
1869                 return NULL;
1870             }
1871             type_comment = NEW_TYPE_COMMENT(tc);
1872             if (!type_comment)
1873                 return NULL;
1874         }
1875     }
1876 
1877     if (is_async)
1878         return AsyncFunctionDef(name, args, body, decorator_seq, returns, type_comment,
1879                                 LINENO(n0), n0->n_col_offset, end_lineno, end_col_offset, c->c_arena);
1880     else
1881         return FunctionDef(name, args, body, decorator_seq, returns, type_comment,
1882                            LINENO(n), n->n_col_offset, end_lineno, end_col_offset, c->c_arena);
1883 }
1884 
1885 static stmt_ty
ast_for_async_funcdef(struct compiling * c,const node * n,asdl_seq * decorator_seq)1886 ast_for_async_funcdef(struct compiling *c, const node *n, asdl_seq *decorator_seq)
1887 {
1888     /* async_funcdef: ASYNC funcdef */
1889     REQ(n, async_funcdef);
1890     REQ(CHILD(n, 0), ASYNC);
1891     REQ(CHILD(n, 1), funcdef);
1892 
1893     return ast_for_funcdef_impl(c, n, decorator_seq,
1894                                 true /* is_async */);
1895 }
1896 
1897 static stmt_ty
ast_for_funcdef(struct compiling * c,const node * n,asdl_seq * decorator_seq)1898 ast_for_funcdef(struct compiling *c, const node *n, asdl_seq *decorator_seq)
1899 {
1900     /* funcdef: 'def' NAME parameters ['->' test] ':' suite */
1901     return ast_for_funcdef_impl(c, n, decorator_seq,
1902                                 false /* is_async */);
1903 }
1904 
1905 
1906 static stmt_ty
ast_for_async_stmt(struct compiling * c,const node * n)1907 ast_for_async_stmt(struct compiling *c, const node *n)
1908 {
1909     /* async_stmt: ASYNC (funcdef | with_stmt | for_stmt) */
1910     REQ(n, async_stmt);
1911     REQ(CHILD(n, 0), ASYNC);
1912 
1913     switch (TYPE(CHILD(n, 1))) {
1914         case funcdef:
1915             return ast_for_funcdef_impl(c, n, NULL,
1916                                         true /* is_async */);
1917         case with_stmt:
1918             return ast_for_with_stmt(c, n,
1919                                      true /* is_async */);
1920 
1921         case for_stmt:
1922             return ast_for_for_stmt(c, n,
1923                                     true /* is_async */);
1924 
1925         default:
1926             PyErr_Format(PyExc_SystemError,
1927                          "invalid async stament: %s",
1928                          STR(CHILD(n, 1)));
1929             return NULL;
1930     }
1931 }
1932 
1933 static stmt_ty
ast_for_decorated(struct compiling * c,const node * n)1934 ast_for_decorated(struct compiling *c, const node *n)
1935 {
1936     /* decorated: decorators (classdef | funcdef | async_funcdef) */
1937     stmt_ty thing = NULL;
1938     asdl_seq *decorator_seq = NULL;
1939 
1940     REQ(n, decorated);
1941 
1942     decorator_seq = ast_for_decorators(c, CHILD(n, 0));
1943     if (!decorator_seq)
1944       return NULL;
1945 
1946     assert(TYPE(CHILD(n, 1)) == funcdef ||
1947            TYPE(CHILD(n, 1)) == async_funcdef ||
1948            TYPE(CHILD(n, 1)) == classdef);
1949 
1950     if (TYPE(CHILD(n, 1)) == funcdef) {
1951       thing = ast_for_funcdef(c, CHILD(n, 1), decorator_seq);
1952     } else if (TYPE(CHILD(n, 1)) == classdef) {
1953       thing = ast_for_classdef(c, CHILD(n, 1), decorator_seq);
1954     } else if (TYPE(CHILD(n, 1)) == async_funcdef) {
1955       thing = ast_for_async_funcdef(c, CHILD(n, 1), decorator_seq);
1956     }
1957     return thing;
1958 }
1959 
1960 static expr_ty
ast_for_namedexpr(struct compiling * c,const node * n)1961 ast_for_namedexpr(struct compiling *c, const node *n)
1962 {
1963     /* namedexpr_test: test [':=' test]
1964        argument: ( test [comp_for] |
1965             test ':=' test |
1966             test '=' test |
1967             '**' test |
1968             '*' test )
1969     */
1970     expr_ty target, value;
1971 
1972     target = ast_for_expr(c, CHILD(n, 0));
1973     if (!target)
1974         return NULL;
1975 
1976     value = ast_for_expr(c, CHILD(n, 2));
1977     if (!value)
1978         return NULL;
1979 
1980     if (target->kind != Name_kind) {
1981         const char *expr_name = get_expr_name(target);
1982         if (expr_name != NULL) {
1983             ast_error(c, n, "cannot use assignment expressions with %s", expr_name);
1984         }
1985         return NULL;
1986     }
1987 
1988     if (!set_context(c, target, Store, n))
1989         return NULL;
1990 
1991     return NamedExpr(target, value, LINENO(n), n->n_col_offset, n->n_end_lineno,
1992                      n->n_end_col_offset, c->c_arena);
1993 }
1994 
1995 static expr_ty
ast_for_lambdef(struct compiling * c,const node * n)1996 ast_for_lambdef(struct compiling *c, const node *n)
1997 {
1998     /* lambdef: 'lambda' [varargslist] ':' test
1999        lambdef_nocond: 'lambda' [varargslist] ':' test_nocond */
2000     arguments_ty args;
2001     expr_ty expression;
2002 
2003     if (NCH(n) == 3) {
2004         args = arguments(NULL, NULL, NULL, NULL, NULL, NULL, NULL, c->c_arena);
2005         if (!args)
2006             return NULL;
2007         expression = ast_for_expr(c, CHILD(n, 2));
2008         if (!expression)
2009             return NULL;
2010     }
2011     else {
2012         args = ast_for_arguments(c, CHILD(n, 1));
2013         if (!args)
2014             return NULL;
2015         expression = ast_for_expr(c, CHILD(n, 3));
2016         if (!expression)
2017             return NULL;
2018     }
2019 
2020     return Lambda(args, expression, LINENO(n), n->n_col_offset,
2021                   n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2022 }
2023 
2024 static expr_ty
ast_for_ifexpr(struct compiling * c,const node * n)2025 ast_for_ifexpr(struct compiling *c, const node *n)
2026 {
2027     /* test: or_test 'if' or_test 'else' test */
2028     expr_ty expression, body, orelse;
2029 
2030     assert(NCH(n) == 5);
2031     body = ast_for_expr(c, CHILD(n, 0));
2032     if (!body)
2033         return NULL;
2034     expression = ast_for_expr(c, CHILD(n, 2));
2035     if (!expression)
2036         return NULL;
2037     orelse = ast_for_expr(c, CHILD(n, 4));
2038     if (!orelse)
2039         return NULL;
2040     return IfExp(expression, body, orelse, LINENO(n), n->n_col_offset,
2041                  n->n_end_lineno, n->n_end_col_offset,
2042                  c->c_arena);
2043 }
2044 
2045 /*
2046    Count the number of 'for' loops in a comprehension.
2047 
2048    Helper for ast_for_comprehension().
2049 */
2050 
2051 static int
count_comp_fors(struct compiling * c,const node * n)2052 count_comp_fors(struct compiling *c, const node *n)
2053 {
2054     int n_fors = 0;
2055 
2056   count_comp_for:
2057     n_fors++;
2058     REQ(n, comp_for);
2059     if (NCH(n) == 2) {
2060         REQ(CHILD(n, 0), ASYNC);
2061         n = CHILD(n, 1);
2062     }
2063     else if (NCH(n) == 1) {
2064         n = CHILD(n, 0);
2065     }
2066     else {
2067         goto error;
2068     }
2069     if (NCH(n) == (5)) {
2070         n = CHILD(n, 4);
2071     }
2072     else {
2073         return n_fors;
2074     }
2075   count_comp_iter:
2076     REQ(n, comp_iter);
2077     n = CHILD(n, 0);
2078     if (TYPE(n) == comp_for)
2079         goto count_comp_for;
2080     else if (TYPE(n) == comp_if) {
2081         if (NCH(n) == 3) {
2082             n = CHILD(n, 2);
2083             goto count_comp_iter;
2084         }
2085         else
2086             return n_fors;
2087     }
2088 
2089   error:
2090     /* Should never be reached */
2091     PyErr_SetString(PyExc_SystemError,
2092                     "logic error in count_comp_fors");
2093     return -1;
2094 }
2095 
2096 /* Count the number of 'if' statements in a comprehension.
2097 
2098    Helper for ast_for_comprehension().
2099 */
2100 
2101 static int
count_comp_ifs(struct compiling * c,const node * n)2102 count_comp_ifs(struct compiling *c, const node *n)
2103 {
2104     int n_ifs = 0;
2105 
2106     while (1) {
2107         REQ(n, comp_iter);
2108         if (TYPE(CHILD(n, 0)) == comp_for)
2109             return n_ifs;
2110         n = CHILD(n, 0);
2111         REQ(n, comp_if);
2112         n_ifs++;
2113         if (NCH(n) == 2)
2114             return n_ifs;
2115         n = CHILD(n, 2);
2116     }
2117 }
2118 
2119 static asdl_seq *
ast_for_comprehension(struct compiling * c,const node * n)2120 ast_for_comprehension(struct compiling *c, const node *n)
2121 {
2122     int i, n_fors;
2123     asdl_seq *comps;
2124 
2125     n_fors = count_comp_fors(c, n);
2126     if (n_fors == -1)
2127         return NULL;
2128 
2129     comps = _Py_asdl_seq_new(n_fors, c->c_arena);
2130     if (!comps)
2131         return NULL;
2132 
2133     for (i = 0; i < n_fors; i++) {
2134         comprehension_ty comp;
2135         asdl_seq *t;
2136         expr_ty expression, first;
2137         node *for_ch;
2138         node *sync_n;
2139         int is_async = 0;
2140 
2141         REQ(n, comp_for);
2142 
2143         if (NCH(n) == 2) {
2144             is_async = 1;
2145             REQ(CHILD(n, 0), ASYNC);
2146             sync_n = CHILD(n, 1);
2147         }
2148         else {
2149             sync_n = CHILD(n, 0);
2150         }
2151         REQ(sync_n, sync_comp_for);
2152 
2153         /* Async comprehensions only allowed in Python 3.6 and greater */
2154         if (is_async && c->c_feature_version < 6) {
2155             ast_error(c, n,
2156                       "Async comprehensions are only supported in Python 3.6 and greater");
2157             return NULL;
2158         }
2159 
2160         for_ch = CHILD(sync_n, 1);
2161         t = ast_for_exprlist(c, for_ch, Store);
2162         if (!t)
2163             return NULL;
2164         expression = ast_for_expr(c, CHILD(sync_n, 3));
2165         if (!expression)
2166             return NULL;
2167 
2168         /* Check the # of children rather than the length of t, since
2169            (x for x, in ...) has 1 element in t, but still requires a Tuple. */
2170         first = (expr_ty)asdl_seq_GET(t, 0);
2171         if (NCH(for_ch) == 1)
2172             comp = comprehension(first, expression, NULL,
2173                                  is_async, c->c_arena);
2174         else
2175             comp = comprehension(Tuple(t, Store, first->lineno, first->col_offset,
2176                                        for_ch->n_end_lineno, for_ch->n_end_col_offset,
2177                                        c->c_arena),
2178                                  expression, NULL, is_async, c->c_arena);
2179         if (!comp)
2180             return NULL;
2181 
2182         if (NCH(sync_n) == 5) {
2183             int j, n_ifs;
2184             asdl_seq *ifs;
2185 
2186             n = CHILD(sync_n, 4);
2187             n_ifs = count_comp_ifs(c, n);
2188             if (n_ifs == -1)
2189                 return NULL;
2190 
2191             ifs = _Py_asdl_seq_new(n_ifs, c->c_arena);
2192             if (!ifs)
2193                 return NULL;
2194 
2195             for (j = 0; j < n_ifs; j++) {
2196                 REQ(n, comp_iter);
2197                 n = CHILD(n, 0);
2198                 REQ(n, comp_if);
2199 
2200                 expression = ast_for_expr(c, CHILD(n, 1));
2201                 if (!expression)
2202                     return NULL;
2203                 asdl_seq_SET(ifs, j, expression);
2204                 if (NCH(n) == 3)
2205                     n = CHILD(n, 2);
2206             }
2207             /* on exit, must guarantee that n is a comp_for */
2208             if (TYPE(n) == comp_iter)
2209                 n = CHILD(n, 0);
2210             comp->ifs = ifs;
2211         }
2212         asdl_seq_SET(comps, i, comp);
2213     }
2214     return comps;
2215 }
2216 
2217 static expr_ty
ast_for_itercomp(struct compiling * c,const node * n,int type)2218 ast_for_itercomp(struct compiling *c, const node *n, int type)
2219 {
2220     /* testlist_comp: (test|star_expr)
2221      *                ( comp_for | (',' (test|star_expr))* [','] ) */
2222     expr_ty elt;
2223     asdl_seq *comps;
2224     node *ch;
2225 
2226     assert(NCH(n) > 1);
2227 
2228     ch = CHILD(n, 0);
2229     elt = ast_for_expr(c, ch);
2230     if (!elt)
2231         return NULL;
2232     if (elt->kind == Starred_kind) {
2233         ast_error(c, ch, "iterable unpacking cannot be used in comprehension");
2234         return NULL;
2235     }
2236 
2237     comps = ast_for_comprehension(c, CHILD(n, 1));
2238     if (!comps)
2239         return NULL;
2240 
2241     if (type == COMP_GENEXP)
2242         return GeneratorExp(elt, comps, LINENO(n), n->n_col_offset,
2243                             n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2244     else if (type == COMP_LISTCOMP)
2245         return ListComp(elt, comps, LINENO(n), n->n_col_offset,
2246                         n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2247     else if (type == COMP_SETCOMP)
2248         return SetComp(elt, comps, LINENO(n), n->n_col_offset,
2249                        n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2250     else
2251         /* Should never happen */
2252         return NULL;
2253 }
2254 
2255 /* Fills in the key, value pair corresponding to the dict element.  In case
2256  * of an unpacking, key is NULL.  *i is advanced by the number of ast
2257  * elements.  Iff successful, nonzero is returned.
2258  */
2259 static int
ast_for_dictelement(struct compiling * c,const node * n,int * i,expr_ty * key,expr_ty * value)2260 ast_for_dictelement(struct compiling *c, const node *n, int *i,
2261                     expr_ty *key, expr_ty *value)
2262 {
2263     expr_ty expression;
2264     if (TYPE(CHILD(n, *i)) == DOUBLESTAR) {
2265         assert(NCH(n) - *i >= 2);
2266 
2267         expression = ast_for_expr(c, CHILD(n, *i + 1));
2268         if (!expression)
2269             return 0;
2270         *key = NULL;
2271         *value = expression;
2272 
2273         *i += 2;
2274     }
2275     else {
2276         assert(NCH(n) - *i >= 3);
2277 
2278         expression = ast_for_expr(c, CHILD(n, *i));
2279         if (!expression)
2280             return 0;
2281         *key = expression;
2282 
2283         REQ(CHILD(n, *i + 1), COLON);
2284 
2285         expression = ast_for_expr(c, CHILD(n, *i + 2));
2286         if (!expression)
2287             return 0;
2288         *value = expression;
2289 
2290         *i += 3;
2291     }
2292     return 1;
2293 }
2294 
2295 static expr_ty
ast_for_dictcomp(struct compiling * c,const node * n)2296 ast_for_dictcomp(struct compiling *c, const node *n)
2297 {
2298     expr_ty key, value;
2299     asdl_seq *comps;
2300     int i = 0;
2301 
2302     if (!ast_for_dictelement(c, n, &i, &key, &value))
2303         return NULL;
2304     assert(key);
2305     assert(NCH(n) - i >= 1);
2306 
2307     comps = ast_for_comprehension(c, CHILD(n, i));
2308     if (!comps)
2309         return NULL;
2310 
2311     return DictComp(key, value, comps, LINENO(n), n->n_col_offset,
2312                     n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2313 }
2314 
2315 static expr_ty
ast_for_dictdisplay(struct compiling * c,const node * n)2316 ast_for_dictdisplay(struct compiling *c, const node *n)
2317 {
2318     int i;
2319     int j;
2320     int size;
2321     asdl_seq *keys, *values;
2322 
2323     size = (NCH(n) + 1) / 3; /* +1 in case no trailing comma */
2324     keys = _Py_asdl_seq_new(size, c->c_arena);
2325     if (!keys)
2326         return NULL;
2327 
2328     values = _Py_asdl_seq_new(size, c->c_arena);
2329     if (!values)
2330         return NULL;
2331 
2332     j = 0;
2333     for (i = 0; i < NCH(n); i++) {
2334         expr_ty key, value;
2335 
2336         if (!ast_for_dictelement(c, n, &i, &key, &value))
2337             return NULL;
2338         asdl_seq_SET(keys, j, key);
2339         asdl_seq_SET(values, j, value);
2340 
2341         j++;
2342     }
2343     keys->size = j;
2344     values->size = j;
2345     return Dict(keys, values, LINENO(n), n->n_col_offset,
2346                 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2347 }
2348 
2349 static expr_ty
ast_for_genexp(struct compiling * c,const node * n)2350 ast_for_genexp(struct compiling *c, const node *n)
2351 {
2352     assert(TYPE(n) == (testlist_comp) || TYPE(n) == (argument));
2353     return ast_for_itercomp(c, n, COMP_GENEXP);
2354 }
2355 
2356 static expr_ty
ast_for_listcomp(struct compiling * c,const node * n)2357 ast_for_listcomp(struct compiling *c, const node *n)
2358 {
2359     assert(TYPE(n) == (testlist_comp));
2360     return ast_for_itercomp(c, n, COMP_LISTCOMP);
2361 }
2362 
2363 static expr_ty
ast_for_setcomp(struct compiling * c,const node * n)2364 ast_for_setcomp(struct compiling *c, const node *n)
2365 {
2366     assert(TYPE(n) == (dictorsetmaker));
2367     return ast_for_itercomp(c, n, COMP_SETCOMP);
2368 }
2369 
2370 static expr_ty
ast_for_setdisplay(struct compiling * c,const node * n)2371 ast_for_setdisplay(struct compiling *c, const node *n)
2372 {
2373     int i;
2374     int size;
2375     asdl_seq *elts;
2376 
2377     assert(TYPE(n) == (dictorsetmaker));
2378     size = (NCH(n) + 1) / 2; /* +1 in case no trailing comma */
2379     elts = _Py_asdl_seq_new(size, c->c_arena);
2380     if (!elts)
2381         return NULL;
2382     for (i = 0; i < NCH(n); i += 2) {
2383         expr_ty expression;
2384         expression = ast_for_expr(c, CHILD(n, i));
2385         if (!expression)
2386             return NULL;
2387         asdl_seq_SET(elts, i / 2, expression);
2388     }
2389     return Set(elts, LINENO(n), n->n_col_offset,
2390                n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2391 }
2392 
2393 static expr_ty
ast_for_atom(struct compiling * c,const node * n)2394 ast_for_atom(struct compiling *c, const node *n)
2395 {
2396     /* atom: '(' [yield_expr|testlist_comp] ')' | '[' [testlist_comp] ']'
2397        | '{' [dictmaker|testlist_comp] '}' | NAME | NUMBER | STRING+
2398        | '...' | 'None' | 'True' | 'False'
2399     */
2400     node *ch = CHILD(n, 0);
2401 
2402     switch (TYPE(ch)) {
2403     case NAME: {
2404         PyObject *name;
2405         const char *s = STR(ch);
2406         size_t len = strlen(s);
2407         if (len >= 4 && len <= 5) {
2408             if (!strcmp(s, "None"))
2409                 return Constant(Py_None, NULL, LINENO(n), n->n_col_offset,
2410                                 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2411             if (!strcmp(s, "True"))
2412                 return Constant(Py_True, NULL, LINENO(n), n->n_col_offset,
2413                                 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2414             if (!strcmp(s, "False"))
2415                 return Constant(Py_False, NULL, LINENO(n), n->n_col_offset,
2416                                 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2417         }
2418         name = new_identifier(s, c);
2419         if (!name)
2420             return NULL;
2421         /* All names start in Load context, but may later be changed. */
2422         return Name(name, Load, LINENO(n), n->n_col_offset,
2423                     n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2424     }
2425     case STRING: {
2426         expr_ty str = parsestrplus(c, n);
2427         if (!str) {
2428             const char *errtype = NULL;
2429             if (PyErr_ExceptionMatches(PyExc_UnicodeError))
2430                 errtype = "unicode error";
2431             else if (PyErr_ExceptionMatches(PyExc_ValueError))
2432                 errtype = "value error";
2433             if (errtype) {
2434                 PyObject *type, *value, *tback, *errstr;
2435                 PyErr_Fetch(&type, &value, &tback);
2436                 errstr = PyObject_Str(value);
2437                 if (errstr) {
2438                     ast_error(c, n, "(%s) %U", errtype, errstr);
2439                     Py_DECREF(errstr);
2440                 }
2441                 else {
2442                     PyErr_Clear();
2443                     ast_error(c, n, "(%s) unknown error", errtype);
2444                 }
2445                 Py_DECREF(type);
2446                 Py_XDECREF(value);
2447                 Py_XDECREF(tback);
2448             }
2449             return NULL;
2450         }
2451         return str;
2452     }
2453     case NUMBER: {
2454         PyObject *pynum;
2455         /* Underscores in numeric literals are only allowed in Python 3.6 or greater */
2456         /* Check for underscores here rather than in parse_number so we can report a line number on error */
2457         if (c->c_feature_version < 6 && strchr(STR(ch), '_') != NULL) {
2458             ast_error(c, ch,
2459                       "Underscores in numeric literals are only supported in Python 3.6 and greater");
2460             return NULL;
2461         }
2462         pynum = parsenumber(c, STR(ch));
2463         if (!pynum)
2464             return NULL;
2465 
2466         if (PyArena_AddPyObject(c->c_arena, pynum) < 0) {
2467             Py_DECREF(pynum);
2468             return NULL;
2469         }
2470         return Constant(pynum, NULL, LINENO(n), n->n_col_offset,
2471                         n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2472     }
2473     case ELLIPSIS: /* Ellipsis */
2474         return Constant(Py_Ellipsis, NULL, LINENO(n), n->n_col_offset,
2475                         n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2476     case LPAR: /* some parenthesized expressions */
2477         ch = CHILD(n, 1);
2478 
2479         if (TYPE(ch) == RPAR)
2480             return Tuple(NULL, Load, LINENO(n), n->n_col_offset,
2481                          n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2482 
2483         if (TYPE(ch) == yield_expr)
2484             return ast_for_expr(c, ch);
2485 
2486         /* testlist_comp: test ( comp_for | (',' test)* [','] ) */
2487         if (NCH(ch) == 1) {
2488             return ast_for_testlist(c, ch);
2489         }
2490 
2491         if (TYPE(CHILD(ch, 1)) == comp_for) {
2492             return copy_location(ast_for_genexp(c, ch), n, n);
2493         }
2494         else {
2495             return copy_location(ast_for_testlist(c, ch), n, n);
2496         }
2497     case LSQB: /* list (or list comprehension) */
2498         ch = CHILD(n, 1);
2499 
2500         if (TYPE(ch) == RSQB)
2501             return List(NULL, Load, LINENO(n), n->n_col_offset,
2502                         n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2503 
2504         REQ(ch, testlist_comp);
2505         if (NCH(ch) == 1 || TYPE(CHILD(ch, 1)) == COMMA) {
2506             asdl_seq *elts = seq_for_testlist(c, ch);
2507             if (!elts)
2508                 return NULL;
2509 
2510             return List(elts, Load, LINENO(n), n->n_col_offset,
2511                         n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2512         }
2513         else {
2514             return copy_location(ast_for_listcomp(c, ch), n, n);
2515         }
2516     case LBRACE: {
2517         /* dictorsetmaker: ( ((test ':' test | '**' test)
2518          *                    (comp_for | (',' (test ':' test | '**' test))* [','])) |
2519          *                   ((test | '*' test)
2520          *                    (comp_for | (',' (test | '*' test))* [','])) ) */
2521         expr_ty res;
2522         ch = CHILD(n, 1);
2523         if (TYPE(ch) == RBRACE) {
2524             /* It's an empty dict. */
2525             return Dict(NULL, NULL, LINENO(n), n->n_col_offset,
2526                         n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2527         }
2528         else {
2529             int is_dict = (TYPE(CHILD(ch, 0)) == DOUBLESTAR);
2530             if (NCH(ch) == 1 ||
2531                     (NCH(ch) > 1 &&
2532                      TYPE(CHILD(ch, 1)) == COMMA)) {
2533                 /* It's a set display. */
2534                 res = ast_for_setdisplay(c, ch);
2535             }
2536             else if (NCH(ch) > 1 &&
2537                     TYPE(CHILD(ch, 1)) == comp_for) {
2538                 /* It's a set comprehension. */
2539                 res = ast_for_setcomp(c, ch);
2540             }
2541             else if (NCH(ch) > 3 - is_dict &&
2542                     TYPE(CHILD(ch, 3 - is_dict)) == comp_for) {
2543                 /* It's a dictionary comprehension. */
2544                 if (is_dict) {
2545                     ast_error(c, n,
2546                               "dict unpacking cannot be used in dict comprehension");
2547                     return NULL;
2548                 }
2549                 res = ast_for_dictcomp(c, ch);
2550             }
2551             else {
2552                 /* It's a dictionary display. */
2553                 res = ast_for_dictdisplay(c, ch);
2554             }
2555             return copy_location(res, n, n);
2556         }
2557     }
2558     default:
2559         PyErr_Format(PyExc_SystemError, "unhandled atom %d", TYPE(ch));
2560         return NULL;
2561     }
2562 }
2563 
2564 static slice_ty
ast_for_slice(struct compiling * c,const node * n)2565 ast_for_slice(struct compiling *c, const node *n)
2566 {
2567     node *ch;
2568     expr_ty lower = NULL, upper = NULL, step = NULL;
2569 
2570     REQ(n, subscript);
2571 
2572     /*
2573        subscript: test | [test] ':' [test] [sliceop]
2574        sliceop: ':' [test]
2575     */
2576     ch = CHILD(n, 0);
2577     if (NCH(n) == 1 && TYPE(ch) == test) {
2578         /* 'step' variable hold no significance in terms of being used over
2579            other vars */
2580         step = ast_for_expr(c, ch);
2581         if (!step)
2582             return NULL;
2583 
2584         return Index(step, c->c_arena);
2585     }
2586 
2587     if (TYPE(ch) == test) {
2588         lower = ast_for_expr(c, ch);
2589         if (!lower)
2590             return NULL;
2591     }
2592 
2593     /* If there's an upper bound it's in the second or third position. */
2594     if (TYPE(ch) == COLON) {
2595         if (NCH(n) > 1) {
2596             node *n2 = CHILD(n, 1);
2597 
2598             if (TYPE(n2) == test) {
2599                 upper = ast_for_expr(c, n2);
2600                 if (!upper)
2601                     return NULL;
2602             }
2603         }
2604     } else if (NCH(n) > 2) {
2605         node *n2 = CHILD(n, 2);
2606 
2607         if (TYPE(n2) == test) {
2608             upper = ast_for_expr(c, n2);
2609             if (!upper)
2610                 return NULL;
2611         }
2612     }
2613 
2614     ch = CHILD(n, NCH(n) - 1);
2615     if (TYPE(ch) == sliceop) {
2616         if (NCH(ch) != 1) {
2617             ch = CHILD(ch, 1);
2618             if (TYPE(ch) == test) {
2619                 step = ast_for_expr(c, ch);
2620                 if (!step)
2621                     return NULL;
2622             }
2623         }
2624     }
2625 
2626     return Slice(lower, upper, step, c->c_arena);
2627 }
2628 
2629 static expr_ty
ast_for_binop(struct compiling * c,const node * n)2630 ast_for_binop(struct compiling *c, const node *n)
2631 {
2632     /* Must account for a sequence of expressions.
2633        How should A op B op C by represented?
2634        BinOp(BinOp(A, op, B), op, C).
2635     */
2636 
2637     int i, nops;
2638     expr_ty expr1, expr2, result;
2639     operator_ty newoperator;
2640 
2641     expr1 = ast_for_expr(c, CHILD(n, 0));
2642     if (!expr1)
2643         return NULL;
2644 
2645     expr2 = ast_for_expr(c, CHILD(n, 2));
2646     if (!expr2)
2647         return NULL;
2648 
2649     newoperator = get_operator(c, CHILD(n, 1));
2650     if (!newoperator)
2651         return NULL;
2652 
2653     result = BinOp(expr1, newoperator, expr2, LINENO(n), n->n_col_offset,
2654                    CHILD(n, 2)->n_end_lineno, CHILD(n, 2)->n_end_col_offset,
2655                    c->c_arena);
2656     if (!result)
2657         return NULL;
2658 
2659     nops = (NCH(n) - 1) / 2;
2660     for (i = 1; i < nops; i++) {
2661         expr_ty tmp_result, tmp;
2662         const node* next_oper = CHILD(n, i * 2 + 1);
2663 
2664         newoperator = get_operator(c, next_oper);
2665         if (!newoperator)
2666             return NULL;
2667 
2668         tmp = ast_for_expr(c, CHILD(n, i * 2 + 2));
2669         if (!tmp)
2670             return NULL;
2671 
2672         tmp_result = BinOp(result, newoperator, tmp,
2673                            LINENO(n), n->n_col_offset,
2674                            CHILD(n, i * 2 + 2)->n_end_lineno,
2675                            CHILD(n, i * 2 + 2)->n_end_col_offset,
2676                            c->c_arena);
2677         if (!tmp_result)
2678             return NULL;
2679         result = tmp_result;
2680     }
2681     return result;
2682 }
2683 
2684 static expr_ty
ast_for_trailer(struct compiling * c,const node * n,expr_ty left_expr,const node * start)2685 ast_for_trailer(struct compiling *c, const node *n, expr_ty left_expr, const node *start)
2686 {
2687     /* trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
2688        subscriptlist: subscript (',' subscript)* [',']
2689        subscript: '.' '.' '.' | test | [test] ':' [test] [sliceop]
2690      */
2691     const node *n_copy = n;
2692     REQ(n, trailer);
2693     if (TYPE(CHILD(n, 0)) == LPAR) {
2694         if (NCH(n) == 2)
2695             return Call(left_expr, NULL, NULL, LINENO(start), start->n_col_offset,
2696                         n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2697         else
2698             return ast_for_call(c, CHILD(n, 1), left_expr,
2699                                 start, CHILD(n, 0), CHILD(n, 2));
2700     }
2701     else if (TYPE(CHILD(n, 0)) == DOT) {
2702         PyObject *attr_id = NEW_IDENTIFIER(CHILD(n, 1));
2703         if (!attr_id)
2704             return NULL;
2705         return Attribute(left_expr, attr_id, Load,
2706                          LINENO(start), start->n_col_offset,
2707                          n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2708     }
2709     else {
2710         REQ(CHILD(n, 0), LSQB);
2711         REQ(CHILD(n, 2), RSQB);
2712         n = CHILD(n, 1);
2713         if (NCH(n) == 1) {
2714             slice_ty slc = ast_for_slice(c, CHILD(n, 0));
2715             if (!slc)
2716                 return NULL;
2717             return Subscript(left_expr, slc, Load, LINENO(start), start->n_col_offset,
2718                              n_copy->n_end_lineno, n_copy->n_end_col_offset,
2719                              c->c_arena);
2720         }
2721         else {
2722             /* The grammar is ambiguous here. The ambiguity is resolved
2723                by treating the sequence as a tuple literal if there are
2724                no slice features.
2725             */
2726             Py_ssize_t j;
2727             slice_ty slc;
2728             expr_ty e;
2729             int simple = 1;
2730             asdl_seq *slices, *elts;
2731             slices = _Py_asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
2732             if (!slices)
2733                 return NULL;
2734             for (j = 0; j < NCH(n); j += 2) {
2735                 slc = ast_for_slice(c, CHILD(n, j));
2736                 if (!slc)
2737                     return NULL;
2738                 if (slc->kind != Index_kind)
2739                     simple = 0;
2740                 asdl_seq_SET(slices, j / 2, slc);
2741             }
2742             if (!simple) {
2743                 return Subscript(left_expr, ExtSlice(slices, c->c_arena),
2744                                  Load, LINENO(start), start->n_col_offset,
2745                                  n_copy->n_end_lineno, n_copy->n_end_col_offset, c->c_arena);
2746             }
2747             /* extract Index values and put them in a Tuple */
2748             elts = _Py_asdl_seq_new(asdl_seq_LEN(slices), c->c_arena);
2749             if (!elts)
2750                 return NULL;
2751             for (j = 0; j < asdl_seq_LEN(slices); ++j) {
2752                 slc = (slice_ty)asdl_seq_GET(slices, j);
2753                 assert(slc->kind == Index_kind  && slc->v.Index.value);
2754                 asdl_seq_SET(elts, j, slc->v.Index.value);
2755             }
2756             e = Tuple(elts, Load, LINENO(n), n->n_col_offset,
2757                       n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2758             if (!e)
2759                 return NULL;
2760             return Subscript(left_expr, Index(e, c->c_arena),
2761                              Load, LINENO(start), start->n_col_offset,
2762                              n_copy->n_end_lineno, n_copy->n_end_col_offset, c->c_arena);
2763         }
2764     }
2765 }
2766 
2767 static expr_ty
ast_for_factor(struct compiling * c,const node * n)2768 ast_for_factor(struct compiling *c, const node *n)
2769 {
2770     expr_ty expression;
2771 
2772     expression = ast_for_expr(c, CHILD(n, 1));
2773     if (!expression)
2774         return NULL;
2775 
2776     switch (TYPE(CHILD(n, 0))) {
2777         case PLUS:
2778             return UnaryOp(UAdd, expression, LINENO(n), n->n_col_offset,
2779                            n->n_end_lineno, n->n_end_col_offset,
2780                            c->c_arena);
2781         case MINUS:
2782             return UnaryOp(USub, expression, LINENO(n), n->n_col_offset,
2783                            n->n_end_lineno, n->n_end_col_offset,
2784                            c->c_arena);
2785         case TILDE:
2786             return UnaryOp(Invert, expression, LINENO(n), n->n_col_offset,
2787                            n->n_end_lineno, n->n_end_col_offset,
2788                            c->c_arena);
2789     }
2790     PyErr_Format(PyExc_SystemError, "unhandled factor: %d",
2791                  TYPE(CHILD(n, 0)));
2792     return NULL;
2793 }
2794 
2795 static expr_ty
ast_for_atom_expr(struct compiling * c,const node * n)2796 ast_for_atom_expr(struct compiling *c, const node *n)
2797 {
2798     int i, nch, start = 0;
2799     expr_ty e;
2800 
2801     REQ(n, atom_expr);
2802     nch = NCH(n);
2803 
2804     if (TYPE(CHILD(n, 0)) == AWAIT) {
2805         if (c->c_feature_version < 5) {
2806             ast_error(c, n,
2807                       "Await expressions are only supported in Python 3.5 and greater");
2808             return NULL;
2809         }
2810         start = 1;
2811         assert(nch > 1);
2812     }
2813 
2814     e = ast_for_atom(c, CHILD(n, start));
2815     if (!e)
2816         return NULL;
2817     if (nch == 1)
2818         return e;
2819     if (start && nch == 2) {
2820         return Await(e, LINENO(n), n->n_col_offset,
2821                      n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2822     }
2823 
2824     for (i = start + 1; i < nch; i++) {
2825         node *ch = CHILD(n, i);
2826         if (TYPE(ch) != trailer)
2827             break;
2828         e = ast_for_trailer(c, ch, e, CHILD(n, start));
2829         if (!e)
2830             return NULL;
2831     }
2832 
2833     if (start) {
2834         /* there was an 'await' */
2835         return Await(e, LINENO(n), n->n_col_offset,
2836                      n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2837     }
2838     else {
2839         return e;
2840     }
2841 }
2842 
2843 static expr_ty
ast_for_power(struct compiling * c,const node * n)2844 ast_for_power(struct compiling *c, const node *n)
2845 {
2846     /* power: atom trailer* ('**' factor)*
2847      */
2848     expr_ty e;
2849     REQ(n, power);
2850     e = ast_for_atom_expr(c, CHILD(n, 0));
2851     if (!e)
2852         return NULL;
2853     if (NCH(n) == 1)
2854         return e;
2855     if (TYPE(CHILD(n, NCH(n) - 1)) == factor) {
2856         expr_ty f = ast_for_expr(c, CHILD(n, NCH(n) - 1));
2857         if (!f)
2858             return NULL;
2859         e = BinOp(e, Pow, f, LINENO(n), n->n_col_offset,
2860                   n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2861     }
2862     return e;
2863 }
2864 
2865 static expr_ty
ast_for_starred(struct compiling * c,const node * n)2866 ast_for_starred(struct compiling *c, const node *n)
2867 {
2868     expr_ty tmp;
2869     REQ(n, star_expr);
2870 
2871     tmp = ast_for_expr(c, CHILD(n, 1));
2872     if (!tmp)
2873         return NULL;
2874 
2875     /* The Load context is changed later. */
2876     return Starred(tmp, Load, LINENO(n), n->n_col_offset,
2877                    n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2878 }
2879 
2880 
2881 /* Do not name a variable 'expr'!  Will cause a compile error.
2882 */
2883 
2884 static expr_ty
ast_for_expr(struct compiling * c,const node * n)2885 ast_for_expr(struct compiling *c, const node *n)
2886 {
2887     /* handle the full range of simple expressions
2888        namedexpr_test: test [':=' test]
2889        test: or_test ['if' or_test 'else' test] | lambdef
2890        test_nocond: or_test | lambdef_nocond
2891        or_test: and_test ('or' and_test)*
2892        and_test: not_test ('and' not_test)*
2893        not_test: 'not' not_test | comparison
2894        comparison: expr (comp_op expr)*
2895        expr: xor_expr ('|' xor_expr)*
2896        xor_expr: and_expr ('^' and_expr)*
2897        and_expr: shift_expr ('&' shift_expr)*
2898        shift_expr: arith_expr (('<<'|'>>') arith_expr)*
2899        arith_expr: term (('+'|'-') term)*
2900        term: factor (('*'|'@'|'/'|'%'|'//') factor)*
2901        factor: ('+'|'-'|'~') factor | power
2902        power: atom_expr ['**' factor]
2903        atom_expr: [AWAIT] atom trailer*
2904        yield_expr: 'yield' [yield_arg]
2905     */
2906 
2907     asdl_seq *seq;
2908     int i;
2909 
2910  loop:
2911     switch (TYPE(n)) {
2912         case namedexpr_test:
2913             if (NCH(n) == 3)
2914                 return ast_for_namedexpr(c, n);
2915             /* Fallthrough */
2916         case test:
2917         case test_nocond:
2918             if (TYPE(CHILD(n, 0)) == lambdef ||
2919                 TYPE(CHILD(n, 0)) == lambdef_nocond)
2920                 return ast_for_lambdef(c, CHILD(n, 0));
2921             else if (NCH(n) > 1)
2922                 return ast_for_ifexpr(c, n);
2923             /* Fallthrough */
2924         case or_test:
2925         case and_test:
2926             if (NCH(n) == 1) {
2927                 n = CHILD(n, 0);
2928                 goto loop;
2929             }
2930             seq = _Py_asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
2931             if (!seq)
2932                 return NULL;
2933             for (i = 0; i < NCH(n); i += 2) {
2934                 expr_ty e = ast_for_expr(c, CHILD(n, i));
2935                 if (!e)
2936                     return NULL;
2937                 asdl_seq_SET(seq, i / 2, e);
2938             }
2939             if (!strcmp(STR(CHILD(n, 1)), "and"))
2940                 return BoolOp(And, seq, LINENO(n), n->n_col_offset,
2941                               n->n_end_lineno, n->n_end_col_offset,
2942                               c->c_arena);
2943             assert(!strcmp(STR(CHILD(n, 1)), "or"));
2944             return BoolOp(Or, seq, LINENO(n), n->n_col_offset,
2945                           n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2946         case not_test:
2947             if (NCH(n) == 1) {
2948                 n = CHILD(n, 0);
2949                 goto loop;
2950             }
2951             else {
2952                 expr_ty expression = ast_for_expr(c, CHILD(n, 1));
2953                 if (!expression)
2954                     return NULL;
2955 
2956                 return UnaryOp(Not, expression, LINENO(n), n->n_col_offset,
2957                                n->n_end_lineno, n->n_end_col_offset,
2958                                c->c_arena);
2959             }
2960         case comparison:
2961             if (NCH(n) == 1) {
2962                 n = CHILD(n, 0);
2963                 goto loop;
2964             }
2965             else {
2966                 expr_ty expression;
2967                 asdl_int_seq *ops;
2968                 asdl_seq *cmps;
2969                 ops = _Py_asdl_int_seq_new(NCH(n) / 2, c->c_arena);
2970                 if (!ops)
2971                     return NULL;
2972                 cmps = _Py_asdl_seq_new(NCH(n) / 2, c->c_arena);
2973                 if (!cmps) {
2974                     return NULL;
2975                 }
2976                 for (i = 1; i < NCH(n); i += 2) {
2977                     cmpop_ty newoperator;
2978 
2979                     newoperator = ast_for_comp_op(c, CHILD(n, i));
2980                     if (!newoperator) {
2981                         return NULL;
2982                     }
2983 
2984                     expression = ast_for_expr(c, CHILD(n, i + 1));
2985                     if (!expression) {
2986                         return NULL;
2987                     }
2988 
2989                     asdl_seq_SET(ops, i / 2, newoperator);
2990                     asdl_seq_SET(cmps, i / 2, expression);
2991                 }
2992                 expression = ast_for_expr(c, CHILD(n, 0));
2993                 if (!expression) {
2994                     return NULL;
2995                 }
2996 
2997                 return Compare(expression, ops, cmps, LINENO(n), n->n_col_offset,
2998                                n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2999             }
3000 
3001         case star_expr:
3002             return ast_for_starred(c, n);
3003         /* The next five cases all handle BinOps.  The main body of code
3004            is the same in each case, but the switch turned inside out to
3005            reuse the code for each type of operator.
3006          */
3007         case expr:
3008         case xor_expr:
3009         case and_expr:
3010         case shift_expr:
3011         case arith_expr:
3012         case term:
3013             if (NCH(n) == 1) {
3014                 n = CHILD(n, 0);
3015                 goto loop;
3016             }
3017             return ast_for_binop(c, n);
3018         case yield_expr: {
3019             node *an = NULL;
3020             node *en = NULL;
3021             int is_from = 0;
3022             expr_ty exp = NULL;
3023             if (NCH(n) > 1)
3024                 an = CHILD(n, 1); /* yield_arg */
3025             if (an) {
3026                 en = CHILD(an, NCH(an) - 1);
3027                 if (NCH(an) == 2) {
3028                     is_from = 1;
3029                     exp = ast_for_expr(c, en);
3030                 }
3031                 else
3032                     exp = ast_for_testlist(c, en);
3033                 if (!exp)
3034                     return NULL;
3035             }
3036             if (is_from)
3037                 return YieldFrom(exp, LINENO(n), n->n_col_offset,
3038                                  n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3039             return Yield(exp, LINENO(n), n->n_col_offset,
3040                          n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3041         }
3042         case factor:
3043             if (NCH(n) == 1) {
3044                 n = CHILD(n, 0);
3045                 goto loop;
3046             }
3047             return ast_for_factor(c, n);
3048         case power:
3049             return ast_for_power(c, n);
3050         default:
3051             PyErr_Format(PyExc_SystemError, "unhandled expr: %d", TYPE(n));
3052             return NULL;
3053     }
3054     /* should never get here unless if error is set */
3055     return NULL;
3056 }
3057 
3058 static expr_ty
ast_for_call(struct compiling * c,const node * n,expr_ty func,const node * start,const node * maybegenbeg,const node * closepar)3059 ast_for_call(struct compiling *c, const node *n, expr_ty func,
3060              const node *start, const node *maybegenbeg, const node *closepar)
3061 {
3062     /*
3063       arglist: argument (',' argument)*  [',']
3064       argument: ( test [comp_for] | '*' test | test '=' test | '**' test )
3065     */
3066 
3067     int i, nargs, nkeywords;
3068     int ndoublestars;
3069     asdl_seq *args;
3070     asdl_seq *keywords;
3071 
3072     REQ(n, arglist);
3073 
3074     nargs = 0;
3075     nkeywords = 0;
3076     for (i = 0; i < NCH(n); i++) {
3077         node *ch = CHILD(n, i);
3078         if (TYPE(ch) == argument) {
3079             if (NCH(ch) == 1)
3080                 nargs++;
3081             else if (TYPE(CHILD(ch, 1)) == comp_for) {
3082                 nargs++;
3083                 if (!maybegenbeg) {
3084                     ast_error(c, ch, "invalid syntax");
3085                     return NULL;
3086                 }
3087                 if (NCH(n) > 1) {
3088                     ast_error(c, ch, "Generator expression must be parenthesized");
3089                     return NULL;
3090                 }
3091             }
3092             else if (TYPE(CHILD(ch, 0)) == STAR)
3093                 nargs++;
3094             else if (TYPE(CHILD(ch, 1)) == COLONEQUAL) {
3095                 nargs++;
3096             }
3097             else
3098                 /* TYPE(CHILD(ch, 0)) == DOUBLESTAR or keyword argument */
3099                 nkeywords++;
3100         }
3101     }
3102 
3103     args = _Py_asdl_seq_new(nargs, c->c_arena);
3104     if (!args)
3105         return NULL;
3106     keywords = _Py_asdl_seq_new(nkeywords, c->c_arena);
3107     if (!keywords)
3108         return NULL;
3109 
3110     nargs = 0;  /* positional arguments + iterable argument unpackings */
3111     nkeywords = 0;  /* keyword arguments + keyword argument unpackings */
3112     ndoublestars = 0;  /* just keyword argument unpackings */
3113     for (i = 0; i < NCH(n); i++) {
3114         node *ch = CHILD(n, i);
3115         if (TYPE(ch) == argument) {
3116             expr_ty e;
3117             node *chch = CHILD(ch, 0);
3118             if (NCH(ch) == 1) {
3119                 /* a positional argument */
3120                 if (nkeywords) {
3121                     if (ndoublestars) {
3122                         ast_error(c, chch,
3123                                   "positional argument follows "
3124                                   "keyword argument unpacking");
3125                     }
3126                     else {
3127                         ast_error(c, chch,
3128                                   "positional argument follows "
3129                                   "keyword argument");
3130                     }
3131                     return NULL;
3132                 }
3133                 e = ast_for_expr(c, chch);
3134                 if (!e)
3135                     return NULL;
3136                 asdl_seq_SET(args, nargs++, e);
3137             }
3138             else if (TYPE(chch) == STAR) {
3139                 /* an iterable argument unpacking */
3140                 expr_ty starred;
3141                 if (ndoublestars) {
3142                     ast_error(c, chch,
3143                               "iterable argument unpacking follows "
3144                               "keyword argument unpacking");
3145                     return NULL;
3146                 }
3147                 e = ast_for_expr(c, CHILD(ch, 1));
3148                 if (!e)
3149                     return NULL;
3150                 starred = Starred(e, Load, LINENO(chch),
3151                         chch->n_col_offset,
3152                         e->end_lineno, e->end_col_offset,
3153                         c->c_arena);
3154                 if (!starred)
3155                     return NULL;
3156                 asdl_seq_SET(args, nargs++, starred);
3157 
3158             }
3159             else if (TYPE(chch) == DOUBLESTAR) {
3160                 /* a keyword argument unpacking */
3161                 keyword_ty kw;
3162                 i++;
3163                 e = ast_for_expr(c, CHILD(ch, 1));
3164                 if (!e)
3165                     return NULL;
3166                 kw = keyword(NULL, e, c->c_arena);
3167                 asdl_seq_SET(keywords, nkeywords++, kw);
3168                 ndoublestars++;
3169             }
3170             else if (TYPE(CHILD(ch, 1)) == comp_for) {
3171                 /* the lone generator expression */
3172                 e = copy_location(ast_for_genexp(c, ch), maybegenbeg, closepar);
3173                 if (!e)
3174                     return NULL;
3175                 asdl_seq_SET(args, nargs++, e);
3176             }
3177             else if (TYPE(CHILD(ch, 1)) == COLONEQUAL) {
3178                 /* treat colon equal as positional argument */
3179                 if (nkeywords) {
3180                     if (ndoublestars) {
3181                         ast_error(c, chch,
3182                                   "positional argument follows "
3183                                   "keyword argument unpacking");
3184                     }
3185                     else {
3186                         ast_error(c, chch,
3187                                   "positional argument follows "
3188                                   "keyword argument");
3189                     }
3190                     return NULL;
3191                 }
3192                 e = ast_for_namedexpr(c, ch);
3193                 if (!e)
3194                     return NULL;
3195                 asdl_seq_SET(args, nargs++, e);
3196             }
3197             else {
3198                 /* a keyword argument */
3199                 keyword_ty kw;
3200                 identifier key, tmp;
3201                 int k;
3202 
3203                 // To remain LL(1), the grammar accepts any test (basically, any
3204                 // expression) in the keyword slot of a call site.  So, we need
3205                 // to manually enforce that the keyword is a NAME here.
3206                 static const int name_tree[] = {
3207                     test,
3208                     or_test,
3209                     and_test,
3210                     not_test,
3211                     comparison,
3212                     expr,
3213                     xor_expr,
3214                     and_expr,
3215                     shift_expr,
3216                     arith_expr,
3217                     term,
3218                     factor,
3219                     power,
3220                     atom_expr,
3221                     atom,
3222                     0,
3223                 };
3224                 node *expr_node = chch;
3225                 for (int i = 0; name_tree[i]; i++) {
3226                     if (TYPE(expr_node) != name_tree[i])
3227                         break;
3228                     if (NCH(expr_node) != 1)
3229                         break;
3230                     expr_node = CHILD(expr_node, 0);
3231                 }
3232                 if (TYPE(expr_node) != NAME) {
3233                     ast_error(c, chch,
3234                               "expression cannot contain assignment, "
3235                               "perhaps you meant \"==\"?");
3236                     return NULL;
3237                 }
3238                 key = new_identifier(STR(expr_node), c);
3239                 if (key == NULL) {
3240                     return NULL;
3241                 }
3242                 if (forbidden_name(c, key, chch, 1)) {
3243                     return NULL;
3244                 }
3245                 for (k = 0; k < nkeywords; k++) {
3246                     tmp = ((keyword_ty)asdl_seq_GET(keywords, k))->arg;
3247                     if (tmp && !PyUnicode_Compare(tmp, key)) {
3248                         ast_error(c, chch,
3249                                   "keyword argument repeated");
3250                         return NULL;
3251                     }
3252                 }
3253                 e = ast_for_expr(c, CHILD(ch, 2));
3254                 if (!e)
3255                     return NULL;
3256                 kw = keyword(key, e, c->c_arena);
3257                 if (!kw)
3258                     return NULL;
3259                 asdl_seq_SET(keywords, nkeywords++, kw);
3260             }
3261         }
3262     }
3263 
3264     return Call(func, args, keywords, LINENO(start), start->n_col_offset,
3265                 closepar->n_end_lineno, closepar->n_end_col_offset, c->c_arena);
3266 }
3267 
3268 static expr_ty
ast_for_testlist(struct compiling * c,const node * n)3269 ast_for_testlist(struct compiling *c, const node* n)
3270 {
3271     /* testlist_comp: test (comp_for | (',' test)* [',']) */
3272     /* testlist: test (',' test)* [','] */
3273     assert(NCH(n) > 0);
3274     if (TYPE(n) == testlist_comp) {
3275         if (NCH(n) > 1)
3276             assert(TYPE(CHILD(n, 1)) != comp_for);
3277     }
3278     else {
3279         assert(TYPE(n) == testlist ||
3280                TYPE(n) == testlist_star_expr);
3281     }
3282     if (NCH(n) == 1)
3283         return ast_for_expr(c, CHILD(n, 0));
3284     else {
3285         asdl_seq *tmp = seq_for_testlist(c, n);
3286         if (!tmp)
3287             return NULL;
3288         return Tuple(tmp, Load, LINENO(n), n->n_col_offset,
3289                      n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3290     }
3291 }
3292 
3293 static stmt_ty
ast_for_expr_stmt(struct compiling * c,const node * n)3294 ast_for_expr_stmt(struct compiling *c, const node *n)
3295 {
3296     REQ(n, expr_stmt);
3297     /* expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) |
3298                      [('=' (yield_expr|testlist_star_expr))+ [TYPE_COMMENT]] )
3299        annassign: ':' test ['=' (yield_expr|testlist)]
3300        testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [',']
3301        augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' |
3302                    '<<=' | '>>=' | '**=' | '//=')
3303        test: ... here starts the operator precedence dance
3304      */
3305     int num = NCH(n);
3306 
3307     if (num == 1) {
3308         expr_ty e = ast_for_testlist(c, CHILD(n, 0));
3309         if (!e)
3310             return NULL;
3311 
3312         return Expr(e, LINENO(n), n->n_col_offset,
3313                     n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3314     }
3315     else if (TYPE(CHILD(n, 1)) == augassign) {
3316         expr_ty expr1, expr2;
3317         operator_ty newoperator;
3318         node *ch = CHILD(n, 0);
3319 
3320         expr1 = ast_for_testlist(c, ch);
3321         if (!expr1)
3322             return NULL;
3323         if(!set_context(c, expr1, Store, ch))
3324             return NULL;
3325         /* set_context checks that most expressions are not the left side.
3326           Augmented assignments can only have a name, a subscript, or an
3327           attribute on the left, though, so we have to explicitly check for
3328           those. */
3329         switch (expr1->kind) {
3330             case Name_kind:
3331             case Attribute_kind:
3332             case Subscript_kind:
3333                 break;
3334             default:
3335                 ast_error(c, ch, "illegal expression for augmented assignment");
3336                 return NULL;
3337         }
3338 
3339         ch = CHILD(n, 2);
3340         if (TYPE(ch) == testlist)
3341             expr2 = ast_for_testlist(c, ch);
3342         else
3343             expr2 = ast_for_expr(c, ch);
3344         if (!expr2)
3345             return NULL;
3346 
3347         newoperator = ast_for_augassign(c, CHILD(n, 1));
3348         if (!newoperator)
3349             return NULL;
3350 
3351         return AugAssign(expr1, newoperator, expr2, LINENO(n), n->n_col_offset,
3352                          n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3353     }
3354     else if (TYPE(CHILD(n, 1)) == annassign) {
3355         expr_ty expr1, expr2, expr3;
3356         node *ch = CHILD(n, 0);
3357         node *deep, *ann = CHILD(n, 1);
3358         int simple = 1;
3359 
3360         /* AnnAssigns are only allowed in Python 3.6 or greater */
3361         if (c->c_feature_version < 6) {
3362             ast_error(c, ch,
3363                       "Variable annotation syntax is only supported in Python 3.6 and greater");
3364             return NULL;
3365         }
3366 
3367         /* we keep track of parens to qualify (x) as expression not name */
3368         deep = ch;
3369         while (NCH(deep) == 1) {
3370             deep = CHILD(deep, 0);
3371         }
3372         if (NCH(deep) > 0 && TYPE(CHILD(deep, 0)) == LPAR) {
3373             simple = 0;
3374         }
3375         expr1 = ast_for_testlist(c, ch);
3376         if (!expr1) {
3377             return NULL;
3378         }
3379         switch (expr1->kind) {
3380             case Name_kind:
3381                 if (forbidden_name(c, expr1->v.Name.id, n, 0)) {
3382                     return NULL;
3383                 }
3384                 expr1->v.Name.ctx = Store;
3385                 break;
3386             case Attribute_kind:
3387                 if (forbidden_name(c, expr1->v.Attribute.attr, n, 1)) {
3388                     return NULL;
3389                 }
3390                 expr1->v.Attribute.ctx = Store;
3391                 break;
3392             case Subscript_kind:
3393                 expr1->v.Subscript.ctx = Store;
3394                 break;
3395             case List_kind:
3396                 ast_error(c, ch,
3397                           "only single target (not list) can be annotated");
3398                 return NULL;
3399             case Tuple_kind:
3400                 ast_error(c, ch,
3401                           "only single target (not tuple) can be annotated");
3402                 return NULL;
3403             default:
3404                 ast_error(c, ch,
3405                           "illegal target for annotation");
3406                 return NULL;
3407         }
3408 
3409         if (expr1->kind != Name_kind) {
3410             simple = 0;
3411         }
3412         ch = CHILD(ann, 1);
3413         expr2 = ast_for_expr(c, ch);
3414         if (!expr2) {
3415             return NULL;
3416         }
3417         if (NCH(ann) == 2) {
3418             return AnnAssign(expr1, expr2, NULL, simple,
3419                              LINENO(n), n->n_col_offset,
3420                              n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3421         }
3422         else {
3423             ch = CHILD(ann, 3);
3424             if (TYPE(ch) == testlist_star_expr) {
3425                 expr3 = ast_for_testlist(c, ch);
3426             }
3427             else {
3428                 expr3 = ast_for_expr(c, ch);
3429             }
3430             if (!expr3) {
3431                 return NULL;
3432             }
3433             return AnnAssign(expr1, expr2, expr3, simple,
3434                              LINENO(n), n->n_col_offset,
3435                              n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3436         }
3437     }
3438     else {
3439         int i, nch_minus_type, has_type_comment;
3440         asdl_seq *targets;
3441         node *value;
3442         expr_ty expression;
3443         string type_comment;
3444 
3445         /* a normal assignment */
3446         REQ(CHILD(n, 1), EQUAL);
3447 
3448         has_type_comment = TYPE(CHILD(n, num - 1)) == TYPE_COMMENT;
3449         nch_minus_type = num - has_type_comment;
3450 
3451         targets = _Py_asdl_seq_new(nch_minus_type / 2, c->c_arena);
3452         if (!targets)
3453             return NULL;
3454         for (i = 0; i < nch_minus_type - 2; i += 2) {
3455             expr_ty e;
3456             node *ch = CHILD(n, i);
3457             if (TYPE(ch) == yield_expr) {
3458                 ast_error(c, ch, "assignment to yield expression not possible");
3459                 return NULL;
3460             }
3461             e = ast_for_testlist(c, ch);
3462             if (!e)
3463               return NULL;
3464 
3465             /* set context to assign */
3466             if (!set_context(c, e, Store, CHILD(n, i)))
3467               return NULL;
3468 
3469             asdl_seq_SET(targets, i / 2, e);
3470         }
3471         value = CHILD(n, nch_minus_type - 1);
3472         if (TYPE(value) == testlist_star_expr)
3473             expression = ast_for_testlist(c, value);
3474         else
3475             expression = ast_for_expr(c, value);
3476         if (!expression)
3477             return NULL;
3478         if (has_type_comment) {
3479             type_comment = NEW_TYPE_COMMENT(CHILD(n, nch_minus_type));
3480             if (!type_comment)
3481                 return NULL;
3482         }
3483         else
3484             type_comment = NULL;
3485         return Assign(targets, expression, type_comment, LINENO(n), n->n_col_offset,
3486                       n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3487     }
3488 }
3489 
3490 
3491 static asdl_seq *
ast_for_exprlist(struct compiling * c,const node * n,expr_context_ty context)3492 ast_for_exprlist(struct compiling *c, const node *n, expr_context_ty context)
3493 {
3494     asdl_seq *seq;
3495     int i;
3496     expr_ty e;
3497 
3498     REQ(n, exprlist);
3499 
3500     seq = _Py_asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
3501     if (!seq)
3502         return NULL;
3503     for (i = 0; i < NCH(n); i += 2) {
3504         e = ast_for_expr(c, CHILD(n, i));
3505         if (!e)
3506             return NULL;
3507         asdl_seq_SET(seq, i / 2, e);
3508         if (context && !set_context(c, e, context, CHILD(n, i)))
3509             return NULL;
3510     }
3511     return seq;
3512 }
3513 
3514 static stmt_ty
ast_for_del_stmt(struct compiling * c,const node * n)3515 ast_for_del_stmt(struct compiling *c, const node *n)
3516 {
3517     asdl_seq *expr_list;
3518 
3519     /* del_stmt: 'del' exprlist */
3520     REQ(n, del_stmt);
3521 
3522     expr_list = ast_for_exprlist(c, CHILD(n, 1), Del);
3523     if (!expr_list)
3524         return NULL;
3525     return Delete(expr_list, LINENO(n), n->n_col_offset,
3526                   n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3527 }
3528 
3529 static stmt_ty
ast_for_flow_stmt(struct compiling * c,const node * n)3530 ast_for_flow_stmt(struct compiling *c, const node *n)
3531 {
3532     /*
3533       flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt
3534                  | yield_stmt
3535       break_stmt: 'break'
3536       continue_stmt: 'continue'
3537       return_stmt: 'return' [testlist]
3538       yield_stmt: yield_expr
3539       yield_expr: 'yield' testlist | 'yield' 'from' test
3540       raise_stmt: 'raise' [test [',' test [',' test]]]
3541     */
3542     node *ch;
3543 
3544     REQ(n, flow_stmt);
3545     ch = CHILD(n, 0);
3546     switch (TYPE(ch)) {
3547         case break_stmt:
3548             return Break(LINENO(n), n->n_col_offset,
3549                          n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3550         case continue_stmt:
3551             return Continue(LINENO(n), n->n_col_offset,
3552                             n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3553         case yield_stmt: { /* will reduce to yield_expr */
3554             expr_ty exp = ast_for_expr(c, CHILD(ch, 0));
3555             if (!exp)
3556                 return NULL;
3557             return Expr(exp, LINENO(n), n->n_col_offset,
3558                         n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3559         }
3560         case return_stmt:
3561             if (NCH(ch) == 1)
3562                 return Return(NULL, LINENO(n), n->n_col_offset,
3563                               n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3564             else {
3565                 expr_ty expression = ast_for_testlist(c, CHILD(ch, 1));
3566                 if (!expression)
3567                     return NULL;
3568                 return Return(expression, LINENO(n), n->n_col_offset,
3569                               n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3570             }
3571         case raise_stmt:
3572             if (NCH(ch) == 1)
3573                 return Raise(NULL, NULL, LINENO(n), n->n_col_offset,
3574                              n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3575             else if (NCH(ch) >= 2) {
3576                 expr_ty cause = NULL;
3577                 expr_ty expression = ast_for_expr(c, CHILD(ch, 1));
3578                 if (!expression)
3579                     return NULL;
3580                 if (NCH(ch) == 4) {
3581                     cause = ast_for_expr(c, CHILD(ch, 3));
3582                     if (!cause)
3583                         return NULL;
3584                 }
3585                 return Raise(expression, cause, LINENO(n), n->n_col_offset,
3586                              n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3587             }
3588             /* fall through */
3589         default:
3590             PyErr_Format(PyExc_SystemError,
3591                          "unexpected flow_stmt: %d", TYPE(ch));
3592             return NULL;
3593     }
3594 }
3595 
3596 static alias_ty
alias_for_import_name(struct compiling * c,const node * n,int store)3597 alias_for_import_name(struct compiling *c, const node *n, int store)
3598 {
3599     /*
3600       import_as_name: NAME ['as' NAME]
3601       dotted_as_name: dotted_name ['as' NAME]
3602       dotted_name: NAME ('.' NAME)*
3603     */
3604     identifier str, name;
3605 
3606  loop:
3607     switch (TYPE(n)) {
3608         case import_as_name: {
3609             node *name_node = CHILD(n, 0);
3610             str = NULL;
3611             name = NEW_IDENTIFIER(name_node);
3612             if (!name)
3613                 return NULL;
3614             if (NCH(n) == 3) {
3615                 node *str_node = CHILD(n, 2);
3616                 str = NEW_IDENTIFIER(str_node);
3617                 if (!str)
3618                     return NULL;
3619                 if (store && forbidden_name(c, str, str_node, 0))
3620                     return NULL;
3621             }
3622             else {
3623                 if (forbidden_name(c, name, name_node, 0))
3624                     return NULL;
3625             }
3626             return alias(name, str, c->c_arena);
3627         }
3628         case dotted_as_name:
3629             if (NCH(n) == 1) {
3630                 n = CHILD(n, 0);
3631                 goto loop;
3632             }
3633             else {
3634                 node *asname_node = CHILD(n, 2);
3635                 alias_ty a = alias_for_import_name(c, CHILD(n, 0), 0);
3636                 if (!a)
3637                     return NULL;
3638                 assert(!a->asname);
3639                 a->asname = NEW_IDENTIFIER(asname_node);
3640                 if (!a->asname)
3641                     return NULL;
3642                 if (forbidden_name(c, a->asname, asname_node, 0))
3643                     return NULL;
3644                 return a;
3645             }
3646         case dotted_name:
3647             if (NCH(n) == 1) {
3648                 node *name_node = CHILD(n, 0);
3649                 name = NEW_IDENTIFIER(name_node);
3650                 if (!name)
3651                     return NULL;
3652                 if (store && forbidden_name(c, name, name_node, 0))
3653                     return NULL;
3654                 return alias(name, NULL, c->c_arena);
3655             }
3656             else {
3657                 /* Create a string of the form "a.b.c" */
3658                 int i;
3659                 size_t len;
3660                 char *s;
3661                 PyObject *uni;
3662 
3663                 len = 0;
3664                 for (i = 0; i < NCH(n); i += 2)
3665                     /* length of string plus one for the dot */
3666                     len += strlen(STR(CHILD(n, i))) + 1;
3667                 len--; /* the last name doesn't have a dot */
3668                 str = PyBytes_FromStringAndSize(NULL, len);
3669                 if (!str)
3670                     return NULL;
3671                 s = PyBytes_AS_STRING(str);
3672                 if (!s)
3673                     return NULL;
3674                 for (i = 0; i < NCH(n); i += 2) {
3675                     char *sch = STR(CHILD(n, i));
3676                     strcpy(s, STR(CHILD(n, i)));
3677                     s += strlen(sch);
3678                     *s++ = '.';
3679                 }
3680                 --s;
3681                 *s = '\0';
3682                 uni = PyUnicode_DecodeUTF8(PyBytes_AS_STRING(str),
3683                                            PyBytes_GET_SIZE(str),
3684                                            NULL);
3685                 Py_DECREF(str);
3686                 if (!uni)
3687                     return NULL;
3688                 str = uni;
3689                 PyUnicode_InternInPlace(&str);
3690                 if (PyArena_AddPyObject(c->c_arena, str) < 0) {
3691                     Py_DECREF(str);
3692                     return NULL;
3693                 }
3694                 return alias(str, NULL, c->c_arena);
3695             }
3696         case STAR:
3697             str = PyUnicode_InternFromString("*");
3698             if (!str)
3699                 return NULL;
3700             if (PyArena_AddPyObject(c->c_arena, str) < 0) {
3701                 Py_DECREF(str);
3702                 return NULL;
3703             }
3704             return alias(str, NULL, c->c_arena);
3705         default:
3706             PyErr_Format(PyExc_SystemError,
3707                          "unexpected import name: %d", TYPE(n));
3708             return NULL;
3709     }
3710 
3711     PyErr_SetString(PyExc_SystemError, "unhandled import name condition");
3712     return NULL;
3713 }
3714 
3715 static stmt_ty
ast_for_import_stmt(struct compiling * c,const node * n)3716 ast_for_import_stmt(struct compiling *c, const node *n)
3717 {
3718     /*
3719       import_stmt: import_name | import_from
3720       import_name: 'import' dotted_as_names
3721       import_from: 'from' (('.' | '...')* dotted_name | ('.' | '...')+)
3722                    'import' ('*' | '(' import_as_names ')' | import_as_names)
3723     */
3724     int lineno;
3725     int col_offset;
3726     int i;
3727     asdl_seq *aliases;
3728 
3729     REQ(n, import_stmt);
3730     lineno = LINENO(n);
3731     col_offset = n->n_col_offset;
3732     n = CHILD(n, 0);
3733     if (TYPE(n) == import_name) {
3734         n = CHILD(n, 1);
3735         REQ(n, dotted_as_names);
3736         aliases = _Py_asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
3737         if (!aliases)
3738                 return NULL;
3739         for (i = 0; i < NCH(n); i += 2) {
3740             alias_ty import_alias = alias_for_import_name(c, CHILD(n, i), 1);
3741             if (!import_alias)
3742                 return NULL;
3743             asdl_seq_SET(aliases, i / 2, import_alias);
3744         }
3745         // Even though n is modified above, the end position is not changed
3746         return Import(aliases, lineno, col_offset,
3747                       n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3748     }
3749     else if (TYPE(n) == import_from) {
3750         int n_children;
3751         int idx, ndots = 0;
3752         const node *n_copy = n;
3753         alias_ty mod = NULL;
3754         identifier modname = NULL;
3755 
3756        /* Count the number of dots (for relative imports) and check for the
3757           optional module name */
3758         for (idx = 1; idx < NCH(n); idx++) {
3759             if (TYPE(CHILD(n, idx)) == dotted_name) {
3760                 mod = alias_for_import_name(c, CHILD(n, idx), 0);
3761                 if (!mod)
3762                     return NULL;
3763                 idx++;
3764                 break;
3765             } else if (TYPE(CHILD(n, idx)) == ELLIPSIS) {
3766                 /* three consecutive dots are tokenized as one ELLIPSIS */
3767                 ndots += 3;
3768                 continue;
3769             } else if (TYPE(CHILD(n, idx)) != DOT) {
3770                 break;
3771             }
3772             ndots++;
3773         }
3774         idx++; /* skip over the 'import' keyword */
3775         switch (TYPE(CHILD(n, idx))) {
3776         case STAR:
3777             /* from ... import * */
3778             n = CHILD(n, idx);
3779             n_children = 1;
3780             break;
3781         case LPAR:
3782             /* from ... import (x, y, z) */
3783             n = CHILD(n, idx + 1);
3784             n_children = NCH(n);
3785             break;
3786         case import_as_names:
3787             /* from ... import x, y, z */
3788             n = CHILD(n, idx);
3789             n_children = NCH(n);
3790             if (n_children % 2 == 0) {
3791                 ast_error(c, n,
3792                           "trailing comma not allowed without"
3793                           " surrounding parentheses");
3794                 return NULL;
3795             }
3796             break;
3797         default:
3798             ast_error(c, n, "Unexpected node-type in from-import");
3799             return NULL;
3800         }
3801 
3802         aliases = _Py_asdl_seq_new((n_children + 1) / 2, c->c_arena);
3803         if (!aliases)
3804             return NULL;
3805 
3806         /* handle "from ... import *" special b/c there's no children */
3807         if (TYPE(n) == STAR) {
3808             alias_ty import_alias = alias_for_import_name(c, n, 1);
3809             if (!import_alias)
3810                 return NULL;
3811             asdl_seq_SET(aliases, 0, import_alias);
3812         }
3813         else {
3814             for (i = 0; i < NCH(n); i += 2) {
3815                 alias_ty import_alias = alias_for_import_name(c, CHILD(n, i), 1);
3816                 if (!import_alias)
3817                     return NULL;
3818                 asdl_seq_SET(aliases, i / 2, import_alias);
3819             }
3820         }
3821         if (mod != NULL)
3822             modname = mod->name;
3823         return ImportFrom(modname, aliases, ndots, lineno, col_offset,
3824                           n_copy->n_end_lineno, n_copy->n_end_col_offset,
3825                           c->c_arena);
3826     }
3827     PyErr_Format(PyExc_SystemError,
3828                  "unknown import statement: starts with command '%s'",
3829                  STR(CHILD(n, 0)));
3830     return NULL;
3831 }
3832 
3833 static stmt_ty
ast_for_global_stmt(struct compiling * c,const node * n)3834 ast_for_global_stmt(struct compiling *c, const node *n)
3835 {
3836     /* global_stmt: 'global' NAME (',' NAME)* */
3837     identifier name;
3838     asdl_seq *s;
3839     int i;
3840 
3841     REQ(n, global_stmt);
3842     s = _Py_asdl_seq_new(NCH(n) / 2, c->c_arena);
3843     if (!s)
3844         return NULL;
3845     for (i = 1; i < NCH(n); i += 2) {
3846         name = NEW_IDENTIFIER(CHILD(n, i));
3847         if (!name)
3848             return NULL;
3849         asdl_seq_SET(s, i / 2, name);
3850     }
3851     return Global(s, LINENO(n), n->n_col_offset,
3852                   n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3853 }
3854 
3855 static stmt_ty
ast_for_nonlocal_stmt(struct compiling * c,const node * n)3856 ast_for_nonlocal_stmt(struct compiling *c, const node *n)
3857 {
3858     /* nonlocal_stmt: 'nonlocal' NAME (',' NAME)* */
3859     identifier name;
3860     asdl_seq *s;
3861     int i;
3862 
3863     REQ(n, nonlocal_stmt);
3864     s = _Py_asdl_seq_new(NCH(n) / 2, c->c_arena);
3865     if (!s)
3866         return NULL;
3867     for (i = 1; i < NCH(n); i += 2) {
3868         name = NEW_IDENTIFIER(CHILD(n, i));
3869         if (!name)
3870             return NULL;
3871         asdl_seq_SET(s, i / 2, name);
3872     }
3873     return Nonlocal(s, LINENO(n), n->n_col_offset,
3874                     n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3875 }
3876 
3877 static stmt_ty
ast_for_assert_stmt(struct compiling * c,const node * n)3878 ast_for_assert_stmt(struct compiling *c, const node *n)
3879 {
3880     /* assert_stmt: 'assert' test [',' test] */
3881     REQ(n, assert_stmt);
3882     if (NCH(n) == 2) {
3883         expr_ty expression = ast_for_expr(c, CHILD(n, 1));
3884         if (!expression)
3885             return NULL;
3886         return Assert(expression, NULL, LINENO(n), n->n_col_offset,
3887                       n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3888     }
3889     else if (NCH(n) == 4) {
3890         expr_ty expr1, expr2;
3891 
3892         expr1 = ast_for_expr(c, CHILD(n, 1));
3893         if (!expr1)
3894             return NULL;
3895         expr2 = ast_for_expr(c, CHILD(n, 3));
3896         if (!expr2)
3897             return NULL;
3898 
3899         return Assert(expr1, expr2, LINENO(n), n->n_col_offset,
3900                       n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3901     }
3902     PyErr_Format(PyExc_SystemError,
3903                  "improper number of parts to 'assert' statement: %d",
3904                  NCH(n));
3905     return NULL;
3906 }
3907 
3908 static asdl_seq *
ast_for_suite(struct compiling * c,const node * n)3909 ast_for_suite(struct compiling *c, const node *n)
3910 {
3911     /* suite: simple_stmt | NEWLINE [TYPE_COMMENT NEWLINE] INDENT stmt+ DEDENT */
3912     asdl_seq *seq;
3913     stmt_ty s;
3914     int i, total, num, end, pos = 0;
3915     node *ch;
3916 
3917     if (TYPE(n) != func_body_suite) {
3918         REQ(n, suite);
3919     }
3920 
3921     total = num_stmts(n);
3922     seq = _Py_asdl_seq_new(total, c->c_arena);
3923     if (!seq)
3924         return NULL;
3925     if (TYPE(CHILD(n, 0)) == simple_stmt) {
3926         n = CHILD(n, 0);
3927         /* simple_stmt always ends with a NEWLINE,
3928            and may have a trailing SEMI
3929         */
3930         end = NCH(n) - 1;
3931         if (TYPE(CHILD(n, end - 1)) == SEMI)
3932             end--;
3933         /* loop by 2 to skip semi-colons */
3934         for (i = 0; i < end; i += 2) {
3935             ch = CHILD(n, i);
3936             s = ast_for_stmt(c, ch);
3937             if (!s)
3938                 return NULL;
3939             asdl_seq_SET(seq, pos++, s);
3940         }
3941     }
3942     else {
3943         i = 2;
3944         if (TYPE(CHILD(n, 1)) == TYPE_COMMENT) {
3945             i += 2;
3946             REQ(CHILD(n, 2), NEWLINE);
3947         }
3948 
3949         for (; i < (NCH(n) - 1); i++) {
3950             ch = CHILD(n, i);
3951             REQ(ch, stmt);
3952             num = num_stmts(ch);
3953             if (num == 1) {
3954                 /* small_stmt or compound_stmt with only one child */
3955                 s = ast_for_stmt(c, ch);
3956                 if (!s)
3957                     return NULL;
3958                 asdl_seq_SET(seq, pos++, s);
3959             }
3960             else {
3961                 int j;
3962                 ch = CHILD(ch, 0);
3963                 REQ(ch, simple_stmt);
3964                 for (j = 0; j < NCH(ch); j += 2) {
3965                     /* statement terminates with a semi-colon ';' */
3966                     if (NCH(CHILD(ch, j)) == 0) {
3967                         assert((j + 1) == NCH(ch));
3968                         break;
3969                     }
3970                     s = ast_for_stmt(c, CHILD(ch, j));
3971                     if (!s)
3972                         return NULL;
3973                     asdl_seq_SET(seq, pos++, s);
3974                 }
3975             }
3976         }
3977     }
3978     assert(pos == seq->size);
3979     return seq;
3980 }
3981 
3982 static void
get_last_end_pos(asdl_seq * s,int * end_lineno,int * end_col_offset)3983 get_last_end_pos(asdl_seq *s, int *end_lineno, int *end_col_offset)
3984 {
3985     Py_ssize_t tot = asdl_seq_LEN(s);
3986     // There must be no empty suites.
3987     assert(tot > 0);
3988     stmt_ty last = asdl_seq_GET(s, tot - 1);
3989     *end_lineno = last->end_lineno;
3990     *end_col_offset = last->end_col_offset;
3991 }
3992 
3993 static stmt_ty
ast_for_if_stmt(struct compiling * c,const node * n)3994 ast_for_if_stmt(struct compiling *c, const node *n)
3995 {
3996     /* if_stmt: 'if' test ':' suite ('elif' test ':' suite)*
3997        ['else' ':' suite]
3998     */
3999     char *s;
4000     int end_lineno, end_col_offset;
4001 
4002     REQ(n, if_stmt);
4003 
4004     if (NCH(n) == 4) {
4005         expr_ty expression;
4006         asdl_seq *suite_seq;
4007 
4008         expression = ast_for_expr(c, CHILD(n, 1));
4009         if (!expression)
4010             return NULL;
4011         suite_seq = ast_for_suite(c, CHILD(n, 3));
4012         if (!suite_seq)
4013             return NULL;
4014         get_last_end_pos(suite_seq, &end_lineno, &end_col_offset);
4015 
4016         return If(expression, suite_seq, NULL, LINENO(n), n->n_col_offset,
4017                   end_lineno, end_col_offset, c->c_arena);
4018     }
4019 
4020     s = STR(CHILD(n, 4));
4021     /* s[2], the third character in the string, will be
4022        's' for el_s_e, or
4023        'i' for el_i_f
4024     */
4025     if (s[2] == 's') {
4026         expr_ty expression;
4027         asdl_seq *seq1, *seq2;
4028 
4029         expression = ast_for_expr(c, CHILD(n, 1));
4030         if (!expression)
4031             return NULL;
4032         seq1 = ast_for_suite(c, CHILD(n, 3));
4033         if (!seq1)
4034             return NULL;
4035         seq2 = ast_for_suite(c, CHILD(n, 6));
4036         if (!seq2)
4037             return NULL;
4038         get_last_end_pos(seq2, &end_lineno, &end_col_offset);
4039 
4040         return If(expression, seq1, seq2, LINENO(n), n->n_col_offset,
4041                   end_lineno, end_col_offset, c->c_arena);
4042     }
4043     else if (s[2] == 'i') {
4044         int i, n_elif, has_else = 0;
4045         expr_ty expression;
4046         asdl_seq *suite_seq;
4047         asdl_seq *orelse = NULL;
4048         n_elif = NCH(n) - 4;
4049         /* must reference the child n_elif+1 since 'else' token is third,
4050            not fourth, child from the end. */
4051         if (TYPE(CHILD(n, (n_elif + 1))) == NAME
4052             && STR(CHILD(n, (n_elif + 1)))[2] == 's') {
4053             has_else = 1;
4054             n_elif -= 3;
4055         }
4056         n_elif /= 4;
4057 
4058         if (has_else) {
4059             asdl_seq *suite_seq2;
4060 
4061             orelse = _Py_asdl_seq_new(1, c->c_arena);
4062             if (!orelse)
4063                 return NULL;
4064             expression = ast_for_expr(c, CHILD(n, NCH(n) - 6));
4065             if (!expression)
4066                 return NULL;
4067             suite_seq = ast_for_suite(c, CHILD(n, NCH(n) - 4));
4068             if (!suite_seq)
4069                 return NULL;
4070             suite_seq2 = ast_for_suite(c, CHILD(n, NCH(n) - 1));
4071             if (!suite_seq2)
4072                 return NULL;
4073             get_last_end_pos(suite_seq2, &end_lineno, &end_col_offset);
4074 
4075             asdl_seq_SET(orelse, 0,
4076                          If(expression, suite_seq, suite_seq2,
4077                             LINENO(CHILD(n, NCH(n) - 7)),
4078                             CHILD(n, NCH(n) - 7)->n_col_offset,
4079                             end_lineno, end_col_offset, c->c_arena));
4080             /* the just-created orelse handled the last elif */
4081             n_elif--;
4082         }
4083 
4084         for (i = 0; i < n_elif; i++) {
4085             int off = 5 + (n_elif - i - 1) * 4;
4086             asdl_seq *newobj = _Py_asdl_seq_new(1, c->c_arena);
4087             if (!newobj)
4088                 return NULL;
4089             expression = ast_for_expr(c, CHILD(n, off));
4090             if (!expression)
4091                 return NULL;
4092             suite_seq = ast_for_suite(c, CHILD(n, off + 2));
4093             if (!suite_seq)
4094                 return NULL;
4095 
4096             if (orelse != NULL) {
4097                 get_last_end_pos(orelse, &end_lineno, &end_col_offset);
4098             } else {
4099                 get_last_end_pos(suite_seq, &end_lineno, &end_col_offset);
4100             }
4101             asdl_seq_SET(newobj, 0,
4102                          If(expression, suite_seq, orelse,
4103                             LINENO(CHILD(n, off - 1)),
4104                             CHILD(n, off - 1)->n_col_offset,
4105                             end_lineno, end_col_offset, c->c_arena));
4106             orelse = newobj;
4107         }
4108         expression = ast_for_expr(c, CHILD(n, 1));
4109         if (!expression)
4110             return NULL;
4111         suite_seq = ast_for_suite(c, CHILD(n, 3));
4112         if (!suite_seq)
4113             return NULL;
4114         get_last_end_pos(orelse, &end_lineno, &end_col_offset);
4115         return If(expression, suite_seq, orelse,
4116                   LINENO(n), n->n_col_offset,
4117                   end_lineno, end_col_offset, c->c_arena);
4118     }
4119 
4120     PyErr_Format(PyExc_SystemError,
4121                  "unexpected token in 'if' statement: %s", s);
4122     return NULL;
4123 }
4124 
4125 static stmt_ty
ast_for_while_stmt(struct compiling * c,const node * n)4126 ast_for_while_stmt(struct compiling *c, const node *n)
4127 {
4128     /* while_stmt: 'while' test ':' suite ['else' ':' suite] */
4129     REQ(n, while_stmt);
4130     int end_lineno, end_col_offset;
4131 
4132     if (NCH(n) == 4) {
4133         expr_ty expression;
4134         asdl_seq *suite_seq;
4135 
4136         expression = ast_for_expr(c, CHILD(n, 1));
4137         if (!expression)
4138             return NULL;
4139         suite_seq = ast_for_suite(c, CHILD(n, 3));
4140         if (!suite_seq)
4141             return NULL;
4142         get_last_end_pos(suite_seq, &end_lineno, &end_col_offset);
4143         return While(expression, suite_seq, NULL, LINENO(n), n->n_col_offset,
4144                      end_lineno, end_col_offset, c->c_arena);
4145     }
4146     else if (NCH(n) == 7) {
4147         expr_ty expression;
4148         asdl_seq *seq1, *seq2;
4149 
4150         expression = ast_for_expr(c, CHILD(n, 1));
4151         if (!expression)
4152             return NULL;
4153         seq1 = ast_for_suite(c, CHILD(n, 3));
4154         if (!seq1)
4155             return NULL;
4156         seq2 = ast_for_suite(c, CHILD(n, 6));
4157         if (!seq2)
4158             return NULL;
4159         get_last_end_pos(seq2, &end_lineno, &end_col_offset);
4160 
4161         return While(expression, seq1, seq2, LINENO(n), n->n_col_offset,
4162                      end_lineno, end_col_offset, c->c_arena);
4163     }
4164 
4165     PyErr_Format(PyExc_SystemError,
4166                  "wrong number of tokens for 'while' statement: %d",
4167                  NCH(n));
4168     return NULL;
4169 }
4170 
4171 static stmt_ty
ast_for_for_stmt(struct compiling * c,const node * n0,bool is_async)4172 ast_for_for_stmt(struct compiling *c, const node *n0, bool is_async)
4173 {
4174     const node * const n = is_async ? CHILD(n0, 1) : n0;
4175     asdl_seq *_target, *seq = NULL, *suite_seq;
4176     expr_ty expression;
4177     expr_ty target, first;
4178     const node *node_target;
4179     int end_lineno, end_col_offset;
4180     int has_type_comment;
4181     string type_comment;
4182 
4183     if (is_async && c->c_feature_version < 5) {
4184         ast_error(c, n,
4185                   "Async for loops are only supported in Python 3.5 and greater");
4186         return NULL;
4187     }
4188 
4189     /* for_stmt: 'for' exprlist 'in' testlist ':' [TYPE_COMMENT] suite ['else' ':' suite] */
4190     REQ(n, for_stmt);
4191 
4192     has_type_comment = TYPE(CHILD(n, 5)) == TYPE_COMMENT;
4193 
4194     if (NCH(n) == 9 + has_type_comment) {
4195         seq = ast_for_suite(c, CHILD(n, 8 + has_type_comment));
4196         if (!seq)
4197             return NULL;
4198     }
4199 
4200     node_target = CHILD(n, 1);
4201     _target = ast_for_exprlist(c, node_target, Store);
4202     if (!_target)
4203         return NULL;
4204     /* Check the # of children rather than the length of _target, since
4205        for x, in ... has 1 element in _target, but still requires a Tuple. */
4206     first = (expr_ty)asdl_seq_GET(_target, 0);
4207     if (NCH(node_target) == 1)
4208         target = first;
4209     else
4210         target = Tuple(_target, Store, first->lineno, first->col_offset,
4211                        node_target->n_end_lineno, node_target->n_end_col_offset,
4212                        c->c_arena);
4213 
4214     expression = ast_for_testlist(c, CHILD(n, 3));
4215     if (!expression)
4216         return NULL;
4217     suite_seq = ast_for_suite(c, CHILD(n, 5 + has_type_comment));
4218     if (!suite_seq)
4219         return NULL;
4220 
4221     if (seq != NULL) {
4222         get_last_end_pos(seq, &end_lineno, &end_col_offset);
4223     } else {
4224         get_last_end_pos(suite_seq, &end_lineno, &end_col_offset);
4225     }
4226 
4227     if (has_type_comment) {
4228         type_comment = NEW_TYPE_COMMENT(CHILD(n, 5));
4229         if (!type_comment)
4230             return NULL;
4231     }
4232     else
4233         type_comment = NULL;
4234 
4235     if (is_async)
4236         return AsyncFor(target, expression, suite_seq, seq, type_comment,
4237                         LINENO(n0), n0->n_col_offset,
4238                         end_lineno, end_col_offset, c->c_arena);
4239     else
4240         return For(target, expression, suite_seq, seq, type_comment,
4241                    LINENO(n), n->n_col_offset,
4242                    end_lineno, end_col_offset, c->c_arena);
4243 }
4244 
4245 static excepthandler_ty
ast_for_except_clause(struct compiling * c,const node * exc,node * body)4246 ast_for_except_clause(struct compiling *c, const node *exc, node *body)
4247 {
4248     /* except_clause: 'except' [test ['as' test]] */
4249     int end_lineno, end_col_offset;
4250     REQ(exc, except_clause);
4251     REQ(body, suite);
4252 
4253     if (NCH(exc) == 1) {
4254         asdl_seq *suite_seq = ast_for_suite(c, body);
4255         if (!suite_seq)
4256             return NULL;
4257         get_last_end_pos(suite_seq, &end_lineno, &end_col_offset);
4258 
4259         return ExceptHandler(NULL, NULL, suite_seq, LINENO(exc),
4260                              exc->n_col_offset,
4261                              end_lineno, end_col_offset, c->c_arena);
4262     }
4263     else if (NCH(exc) == 2) {
4264         expr_ty expression;
4265         asdl_seq *suite_seq;
4266 
4267         expression = ast_for_expr(c, CHILD(exc, 1));
4268         if (!expression)
4269             return NULL;
4270         suite_seq = ast_for_suite(c, body);
4271         if (!suite_seq)
4272             return NULL;
4273         get_last_end_pos(suite_seq, &end_lineno, &end_col_offset);
4274 
4275         return ExceptHandler(expression, NULL, suite_seq, LINENO(exc),
4276                              exc->n_col_offset,
4277                              end_lineno, end_col_offset, c->c_arena);
4278     }
4279     else if (NCH(exc) == 4) {
4280         asdl_seq *suite_seq;
4281         expr_ty expression;
4282         identifier e = NEW_IDENTIFIER(CHILD(exc, 3));
4283         if (!e)
4284             return NULL;
4285         if (forbidden_name(c, e, CHILD(exc, 3), 0))
4286             return NULL;
4287         expression = ast_for_expr(c, CHILD(exc, 1));
4288         if (!expression)
4289             return NULL;
4290         suite_seq = ast_for_suite(c, body);
4291         if (!suite_seq)
4292             return NULL;
4293         get_last_end_pos(suite_seq, &end_lineno, &end_col_offset);
4294 
4295         return ExceptHandler(expression, e, suite_seq, LINENO(exc),
4296                              exc->n_col_offset,
4297                              end_lineno, end_col_offset, c->c_arena);
4298     }
4299 
4300     PyErr_Format(PyExc_SystemError,
4301                  "wrong number of children for 'except' clause: %d",
4302                  NCH(exc));
4303     return NULL;
4304 }
4305 
4306 static stmt_ty
ast_for_try_stmt(struct compiling * c,const node * n)4307 ast_for_try_stmt(struct compiling *c, const node *n)
4308 {
4309     const int nch = NCH(n);
4310     int end_lineno, end_col_offset, n_except = (nch - 3)/3;
4311     asdl_seq *body, *handlers = NULL, *orelse = NULL, *finally = NULL;
4312     excepthandler_ty last_handler;
4313 
4314     REQ(n, try_stmt);
4315 
4316     body = ast_for_suite(c, CHILD(n, 2));
4317     if (body == NULL)
4318         return NULL;
4319 
4320     if (TYPE(CHILD(n, nch - 3)) == NAME) {
4321         if (strcmp(STR(CHILD(n, nch - 3)), "finally") == 0) {
4322             if (nch >= 9 && TYPE(CHILD(n, nch - 6)) == NAME) {
4323                 /* we can assume it's an "else",
4324                    because nch >= 9 for try-else-finally and
4325                    it would otherwise have a type of except_clause */
4326                 orelse = ast_for_suite(c, CHILD(n, nch - 4));
4327                 if (orelse == NULL)
4328                     return NULL;
4329                 n_except--;
4330             }
4331 
4332             finally = ast_for_suite(c, CHILD(n, nch - 1));
4333             if (finally == NULL)
4334                 return NULL;
4335             n_except--;
4336         }
4337         else {
4338             /* we can assume it's an "else",
4339                otherwise it would have a type of except_clause */
4340             orelse = ast_for_suite(c, CHILD(n, nch - 1));
4341             if (orelse == NULL)
4342                 return NULL;
4343             n_except--;
4344         }
4345     }
4346     else if (TYPE(CHILD(n, nch - 3)) != except_clause) {
4347         ast_error(c, n, "malformed 'try' statement");
4348         return NULL;
4349     }
4350 
4351     if (n_except > 0) {
4352         int i;
4353         /* process except statements to create a try ... except */
4354         handlers = _Py_asdl_seq_new(n_except, c->c_arena);
4355         if (handlers == NULL)
4356             return NULL;
4357 
4358         for (i = 0; i < n_except; i++) {
4359             excepthandler_ty e = ast_for_except_clause(c, CHILD(n, 3 + i * 3),
4360                                                        CHILD(n, 5 + i * 3));
4361             if (!e)
4362                 return NULL;
4363             asdl_seq_SET(handlers, i, e);
4364         }
4365     }
4366 
4367     assert(finally != NULL || asdl_seq_LEN(handlers));
4368         if (finally != NULL) {
4369         // finally is always last
4370         get_last_end_pos(finally, &end_lineno, &end_col_offset);
4371     } else if (orelse != NULL) {
4372         // otherwise else is last
4373         get_last_end_pos(orelse, &end_lineno, &end_col_offset);
4374     } else {
4375         // inline the get_last_end_pos logic due to layout mismatch
4376         last_handler = (excepthandler_ty) asdl_seq_GET(handlers, n_except - 1);
4377         end_lineno = last_handler->end_lineno;
4378         end_col_offset = last_handler->end_col_offset;
4379     }
4380     return Try(body, handlers, orelse, finally, LINENO(n), n->n_col_offset,
4381                end_lineno, end_col_offset, c->c_arena);
4382 }
4383 
4384 /* with_item: test ['as' expr] */
4385 static withitem_ty
ast_for_with_item(struct compiling * c,const node * n)4386 ast_for_with_item(struct compiling *c, const node *n)
4387 {
4388     expr_ty context_expr, optional_vars = NULL;
4389 
4390     REQ(n, with_item);
4391     context_expr = ast_for_expr(c, CHILD(n, 0));
4392     if (!context_expr)
4393         return NULL;
4394     if (NCH(n) == 3) {
4395         optional_vars = ast_for_expr(c, CHILD(n, 2));
4396 
4397         if (!optional_vars) {
4398             return NULL;
4399         }
4400         if (!set_context(c, optional_vars, Store, n)) {
4401             return NULL;
4402         }
4403     }
4404 
4405     return withitem(context_expr, optional_vars, c->c_arena);
4406 }
4407 
4408 /* with_stmt: 'with' with_item (',' with_item)*  ':' [TYPE_COMMENT] suite */
4409 static stmt_ty
ast_for_with_stmt(struct compiling * c,const node * n0,bool is_async)4410 ast_for_with_stmt(struct compiling *c, const node *n0, bool is_async)
4411 {
4412     const node * const n = is_async ? CHILD(n0, 1) : n0;
4413     int i, n_items, nch_minus_type, has_type_comment, end_lineno, end_col_offset;
4414     asdl_seq *items, *body;
4415     string type_comment;
4416 
4417     if (is_async && c->c_feature_version < 5) {
4418         ast_error(c, n,
4419                   "Async with statements are only supported in Python 3.5 and greater");
4420         return NULL;
4421     }
4422 
4423     REQ(n, with_stmt);
4424 
4425     has_type_comment = TYPE(CHILD(n, NCH(n) - 2)) == TYPE_COMMENT;
4426     nch_minus_type = NCH(n) - has_type_comment;
4427 
4428     n_items = (nch_minus_type - 2) / 2;
4429     items = _Py_asdl_seq_new(n_items, c->c_arena);
4430     if (!items)
4431         return NULL;
4432     for (i = 1; i < nch_minus_type - 2; i += 2) {
4433         withitem_ty item = ast_for_with_item(c, CHILD(n, i));
4434         if (!item)
4435             return NULL;
4436         asdl_seq_SET(items, (i - 1) / 2, item);
4437     }
4438 
4439     body = ast_for_suite(c, CHILD(n, NCH(n) - 1));
4440     if (!body)
4441         return NULL;
4442     get_last_end_pos(body, &end_lineno, &end_col_offset);
4443 
4444     if (has_type_comment) {
4445         type_comment = NEW_TYPE_COMMENT(CHILD(n, NCH(n) - 2));
4446         if (!type_comment)
4447             return NULL;
4448     }
4449     else
4450         type_comment = NULL;
4451 
4452     if (is_async)
4453         return AsyncWith(items, body, type_comment, LINENO(n0), n0->n_col_offset,
4454                          end_lineno, end_col_offset, c->c_arena);
4455     else
4456         return With(items, body, type_comment, LINENO(n), n->n_col_offset,
4457                     end_lineno, end_col_offset, c->c_arena);
4458 }
4459 
4460 static stmt_ty
ast_for_classdef(struct compiling * c,const node * n,asdl_seq * decorator_seq)4461 ast_for_classdef(struct compiling *c, const node *n, asdl_seq *decorator_seq)
4462 {
4463     /* classdef: 'class' NAME ['(' arglist ')'] ':' suite */
4464     PyObject *classname;
4465     asdl_seq *s;
4466     expr_ty call;
4467     int end_lineno, end_col_offset;
4468 
4469     REQ(n, classdef);
4470 
4471     if (NCH(n) == 4) { /* class NAME ':' suite */
4472         s = ast_for_suite(c, CHILD(n, 3));
4473         if (!s)
4474             return NULL;
4475         get_last_end_pos(s, &end_lineno, &end_col_offset);
4476 
4477         classname = NEW_IDENTIFIER(CHILD(n, 1));
4478         if (!classname)
4479             return NULL;
4480         if (forbidden_name(c, classname, CHILD(n, 3), 0))
4481             return NULL;
4482         return ClassDef(classname, NULL, NULL, s, decorator_seq,
4483                         LINENO(n), n->n_col_offset,
4484                         end_lineno, end_col_offset, c->c_arena);
4485     }
4486 
4487     if (TYPE(CHILD(n, 3)) == RPAR) { /* class NAME '(' ')' ':' suite */
4488         s = ast_for_suite(c, CHILD(n, 5));
4489         if (!s)
4490             return NULL;
4491         get_last_end_pos(s, &end_lineno, &end_col_offset);
4492 
4493         classname = NEW_IDENTIFIER(CHILD(n, 1));
4494         if (!classname)
4495             return NULL;
4496         if (forbidden_name(c, classname, CHILD(n, 3), 0))
4497             return NULL;
4498         return ClassDef(classname, NULL, NULL, s, decorator_seq,
4499                         LINENO(n), n->n_col_offset,
4500                         end_lineno, end_col_offset, c->c_arena);
4501     }
4502 
4503     /* class NAME '(' arglist ')' ':' suite */
4504     /* build up a fake Call node so we can extract its pieces */
4505     {
4506         PyObject *dummy_name;
4507         expr_ty dummy;
4508         dummy_name = NEW_IDENTIFIER(CHILD(n, 1));
4509         if (!dummy_name)
4510             return NULL;
4511         dummy = Name(dummy_name, Load, LINENO(n), n->n_col_offset,
4512                      CHILD(n, 1)->n_end_lineno, CHILD(n, 1)->n_end_col_offset,
4513                      c->c_arena);
4514         call = ast_for_call(c, CHILD(n, 3), dummy,
4515                             CHILD(n, 1), NULL, CHILD(n, 4));
4516         if (!call)
4517             return NULL;
4518     }
4519     s = ast_for_suite(c, CHILD(n, 6));
4520     if (!s)
4521         return NULL;
4522     get_last_end_pos(s, &end_lineno, &end_col_offset);
4523 
4524     classname = NEW_IDENTIFIER(CHILD(n, 1));
4525     if (!classname)
4526         return NULL;
4527     if (forbidden_name(c, classname, CHILD(n, 1), 0))
4528         return NULL;
4529 
4530     return ClassDef(classname, call->v.Call.args, call->v.Call.keywords, s,
4531                     decorator_seq, LINENO(n), n->n_col_offset,
4532                     end_lineno, end_col_offset, c->c_arena);
4533 }
4534 
4535 static stmt_ty
ast_for_stmt(struct compiling * c,const node * n)4536 ast_for_stmt(struct compiling *c, const node *n)
4537 {
4538     if (TYPE(n) == stmt) {
4539         assert(NCH(n) == 1);
4540         n = CHILD(n, 0);
4541     }
4542     if (TYPE(n) == simple_stmt) {
4543         assert(num_stmts(n) == 1);
4544         n = CHILD(n, 0);
4545     }
4546     if (TYPE(n) == small_stmt) {
4547         n = CHILD(n, 0);
4548         /* small_stmt: expr_stmt | del_stmt | pass_stmt | flow_stmt
4549                   | import_stmt | global_stmt | nonlocal_stmt | assert_stmt
4550         */
4551         switch (TYPE(n)) {
4552             case expr_stmt:
4553                 return ast_for_expr_stmt(c, n);
4554             case del_stmt:
4555                 return ast_for_del_stmt(c, n);
4556             case pass_stmt:
4557                 return Pass(LINENO(n), n->n_col_offset,
4558                             n->n_end_lineno, n->n_end_col_offset, c->c_arena);
4559             case flow_stmt:
4560                 return ast_for_flow_stmt(c, n);
4561             case import_stmt:
4562                 return ast_for_import_stmt(c, n);
4563             case global_stmt:
4564                 return ast_for_global_stmt(c, n);
4565             case nonlocal_stmt:
4566                 return ast_for_nonlocal_stmt(c, n);
4567             case assert_stmt:
4568                 return ast_for_assert_stmt(c, n);
4569             default:
4570                 PyErr_Format(PyExc_SystemError,
4571                              "unhandled small_stmt: TYPE=%d NCH=%d\n",
4572                              TYPE(n), NCH(n));
4573                 return NULL;
4574         }
4575     }
4576     else {
4577         /* compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt
4578                         | funcdef | classdef | decorated | async_stmt
4579         */
4580         node *ch = CHILD(n, 0);
4581         REQ(n, compound_stmt);
4582         switch (TYPE(ch)) {
4583             case if_stmt:
4584                 return ast_for_if_stmt(c, ch);
4585             case while_stmt:
4586                 return ast_for_while_stmt(c, ch);
4587             case for_stmt:
4588                 return ast_for_for_stmt(c, ch, 0);
4589             case try_stmt:
4590                 return ast_for_try_stmt(c, ch);
4591             case with_stmt:
4592                 return ast_for_with_stmt(c, ch, 0);
4593             case funcdef:
4594                 return ast_for_funcdef(c, ch, NULL);
4595             case classdef:
4596                 return ast_for_classdef(c, ch, NULL);
4597             case decorated:
4598                 return ast_for_decorated(c, ch);
4599             case async_stmt:
4600                 return ast_for_async_stmt(c, ch);
4601             default:
4602                 PyErr_Format(PyExc_SystemError,
4603                              "unhandled compound_stmt: TYPE=%d NCH=%d\n",
4604                              TYPE(n), NCH(n));
4605                 return NULL;
4606         }
4607     }
4608 }
4609 
4610 static PyObject *
parsenumber_raw(struct compiling * c,const char * s)4611 parsenumber_raw(struct compiling *c, const char *s)
4612 {
4613     const char *end;
4614     long x;
4615     double dx;
4616     Py_complex compl;
4617     int imflag;
4618 
4619     assert(s != NULL);
4620     errno = 0;
4621     end = s + strlen(s) - 1;
4622     imflag = *end == 'j' || *end == 'J';
4623     if (s[0] == '0') {
4624         x = (long) PyOS_strtoul(s, (char **)&end, 0);
4625         if (x < 0 && errno == 0) {
4626             return PyLong_FromString(s, (char **)0, 0);
4627         }
4628     }
4629     else
4630         x = PyOS_strtol(s, (char **)&end, 0);
4631     if (*end == '\0') {
4632         if (errno != 0)
4633             return PyLong_FromString(s, (char **)0, 0);
4634         return PyLong_FromLong(x);
4635     }
4636     /* XXX Huge floats may silently fail */
4637     if (imflag) {
4638         compl.real = 0.;
4639         compl.imag = PyOS_string_to_double(s, (char **)&end, NULL);
4640         if (compl.imag == -1.0 && PyErr_Occurred())
4641             return NULL;
4642         return PyComplex_FromCComplex(compl);
4643     }
4644     else
4645     {
4646         dx = PyOS_string_to_double(s, NULL, NULL);
4647         if (dx == -1.0 && PyErr_Occurred())
4648             return NULL;
4649         return PyFloat_FromDouble(dx);
4650     }
4651 }
4652 
4653 static PyObject *
parsenumber(struct compiling * c,const char * s)4654 parsenumber(struct compiling *c, const char *s)
4655 {
4656     char *dup, *end;
4657     PyObject *res = NULL;
4658 
4659     assert(s != NULL);
4660 
4661     if (strchr(s, '_') == NULL) {
4662         return parsenumber_raw(c, s);
4663     }
4664     /* Create a duplicate without underscores. */
4665     dup = PyMem_Malloc(strlen(s) + 1);
4666     if (dup == NULL) {
4667         return PyErr_NoMemory();
4668     }
4669     end = dup;
4670     for (; *s; s++) {
4671         if (*s != '_') {
4672             *end++ = *s;
4673         }
4674     }
4675     *end = '\0';
4676     res = parsenumber_raw(c, dup);
4677     PyMem_Free(dup);
4678     return res;
4679 }
4680 
4681 static PyObject *
decode_utf8(struct compiling * c,const char ** sPtr,const char * end)4682 decode_utf8(struct compiling *c, const char **sPtr, const char *end)
4683 {
4684     const char *s, *t;
4685     t = s = *sPtr;
4686     /* while (s < end && *s != '\\') s++; */ /* inefficient for u".." */
4687     while (s < end && (*s & 0x80)) s++;
4688     *sPtr = s;
4689     return PyUnicode_DecodeUTF8(t, s - t, NULL);
4690 }
4691 
4692 static int
warn_invalid_escape_sequence(struct compiling * c,const node * n,unsigned char first_invalid_escape_char)4693 warn_invalid_escape_sequence(struct compiling *c, const node *n,
4694                              unsigned char first_invalid_escape_char)
4695 {
4696     PyObject *msg = PyUnicode_FromFormat("invalid escape sequence \\%c",
4697                                          first_invalid_escape_char);
4698     if (msg == NULL) {
4699         return -1;
4700     }
4701     if (PyErr_WarnExplicitObject(PyExc_DeprecationWarning, msg,
4702                                    c->c_filename, LINENO(n),
4703                                    NULL, NULL) < 0)
4704     {
4705         if (PyErr_ExceptionMatches(PyExc_DeprecationWarning)) {
4706             /* Replace the DeprecationWarning exception with a SyntaxError
4707                to get a more accurate error report */
4708             PyErr_Clear();
4709             ast_error(c, n, "%U", msg);
4710         }
4711         Py_DECREF(msg);
4712         return -1;
4713     }
4714     Py_DECREF(msg);
4715     return 0;
4716 }
4717 
4718 static PyObject *
decode_unicode_with_escapes(struct compiling * c,const node * n,const char * s,size_t len)4719 decode_unicode_with_escapes(struct compiling *c, const node *n, const char *s,
4720                             size_t len)
4721 {
4722     PyObject *v, *u;
4723     char *buf;
4724     char *p;
4725     const char *end;
4726 
4727     /* check for integer overflow */
4728     if (len > SIZE_MAX / 6)
4729         return NULL;
4730     /* "ä" (2 bytes) may become "\U000000E4" (10 bytes), or 1:5
4731        "\ä" (3 bytes) may become "\u005c\U000000E4" (16 bytes), or ~1:6 */
4732     u = PyBytes_FromStringAndSize((char *)NULL, len * 6);
4733     if (u == NULL)
4734         return NULL;
4735     p = buf = PyBytes_AsString(u);
4736     end = s + len;
4737     while (s < end) {
4738         if (*s == '\\') {
4739             *p++ = *s++;
4740             if (s >= end || *s & 0x80) {
4741                 strcpy(p, "u005c");
4742                 p += 5;
4743                 if (s >= end)
4744                     break;
4745             }
4746         }
4747         if (*s & 0x80) { /* XXX inefficient */
4748             PyObject *w;
4749             int kind;
4750             void *data;
4751             Py_ssize_t len, i;
4752             w = decode_utf8(c, &s, end);
4753             if (w == NULL) {
4754                 Py_DECREF(u);
4755                 return NULL;
4756             }
4757             kind = PyUnicode_KIND(w);
4758             data = PyUnicode_DATA(w);
4759             len = PyUnicode_GET_LENGTH(w);
4760             for (i = 0; i < len; i++) {
4761                 Py_UCS4 chr = PyUnicode_READ(kind, data, i);
4762                 sprintf(p, "\\U%08x", chr);
4763                 p += 10;
4764             }
4765             /* Should be impossible to overflow */
4766             assert(p - buf <= PyBytes_GET_SIZE(u));
4767             Py_DECREF(w);
4768         } else {
4769             *p++ = *s++;
4770         }
4771     }
4772     len = p - buf;
4773     s = buf;
4774 
4775     const char *first_invalid_escape;
4776     v = _PyUnicode_DecodeUnicodeEscape(s, len, NULL, &first_invalid_escape);
4777 
4778     if (v != NULL && first_invalid_escape != NULL) {
4779         if (warn_invalid_escape_sequence(c, n, *first_invalid_escape) < 0) {
4780             /* We have not decref u before because first_invalid_escape points
4781                inside u. */
4782             Py_XDECREF(u);
4783             Py_DECREF(v);
4784             return NULL;
4785         }
4786     }
4787     Py_XDECREF(u);
4788     return v;
4789 }
4790 
4791 static PyObject *
decode_bytes_with_escapes(struct compiling * c,const node * n,const char * s,size_t len)4792 decode_bytes_with_escapes(struct compiling *c, const node *n, const char *s,
4793                           size_t len)
4794 {
4795     const char *first_invalid_escape;
4796     PyObject *result = _PyBytes_DecodeEscape(s, len, NULL, 0, NULL,
4797                                              &first_invalid_escape);
4798     if (result == NULL)
4799         return NULL;
4800 
4801     if (first_invalid_escape != NULL) {
4802         if (warn_invalid_escape_sequence(c, n, *first_invalid_escape) < 0) {
4803             Py_DECREF(result);
4804             return NULL;
4805         }
4806     }
4807     return result;
4808 }
4809 
4810 /* Shift locations for the given node and all its children by adding `lineno`
4811    and `col_offset` to existing locations. */
fstring_shift_node_locations(node * n,int lineno,int col_offset)4812 static void fstring_shift_node_locations(node *n, int lineno, int col_offset)
4813 {
4814     n->n_col_offset = n->n_col_offset + col_offset;
4815     n->n_end_col_offset = n->n_end_col_offset + col_offset;
4816     for (int i = 0; i < NCH(n); ++i) {
4817         if (n->n_lineno && n->n_lineno < CHILD(n, i)->n_lineno) {
4818             /* Shifting column offsets unnecessary if there's been newlines. */
4819             col_offset = 0;
4820         }
4821         fstring_shift_node_locations(CHILD(n, i), lineno, col_offset);
4822     }
4823     n->n_lineno = n->n_lineno + lineno;
4824     n->n_end_lineno = n->n_end_lineno + lineno;
4825 }
4826 
4827 /* Fix locations for the given node and its children.
4828 
4829    `parent` is the enclosing node.
4830    `n` is the node which locations are going to be fixed relative to parent.
4831    `expr_str` is the child node's string representation, including braces.
4832 */
4833 static void
fstring_fix_node_location(const node * parent,node * n,char * expr_str)4834 fstring_fix_node_location(const node *parent, node *n, char *expr_str)
4835 {
4836     char *substr = NULL;
4837     char *start;
4838     int lines = LINENO(parent) - 1;
4839     int cols = parent->n_col_offset;
4840     /* Find the full fstring to fix location information in `n`. */
4841     while (parent && parent->n_type != STRING)
4842         parent = parent->n_child;
4843     if (parent && parent->n_str) {
4844         substr = strstr(parent->n_str, expr_str);
4845         if (substr) {
4846             start = substr;
4847             while (start > parent->n_str) {
4848                 if (start[0] == '\n')
4849                     break;
4850                 start--;
4851             }
4852             cols += (int)(substr - start);
4853             /* adjust the start based on the number of newlines encountered
4854                before the f-string expression */
4855             for (char* p = parent->n_str; p < substr; p++) {
4856                 if (*p == '\n') {
4857                     lines++;
4858                 }
4859             }
4860         }
4861     }
4862     fstring_shift_node_locations(n, lines, cols);
4863 }
4864 
4865 /* Compile this expression in to an expr_ty.  Add parens around the
4866    expression, in order to allow leading spaces in the expression. */
4867 static expr_ty
fstring_compile_expr(const char * expr_start,const char * expr_end,struct compiling * c,const node * n)4868 fstring_compile_expr(const char *expr_start, const char *expr_end,
4869                      struct compiling *c, const node *n)
4870 
4871 {
4872     node *mod_n;
4873     mod_ty mod;
4874     char *str;
4875     Py_ssize_t len;
4876     const char *s;
4877 
4878     assert(expr_end >= expr_start);
4879     assert(*(expr_start-1) == '{');
4880     assert(*expr_end == '}' || *expr_end == '!' || *expr_end == ':' ||
4881            *expr_end == '=');
4882 
4883     /* If the substring is all whitespace, it's an error.  We need to catch this
4884        here, and not when we call PyParser_SimpleParseStringFlagsFilename,
4885        because turning the expression '' in to '()' would go from being invalid
4886        to valid. */
4887     for (s = expr_start; s != expr_end; s++) {
4888         char c = *s;
4889         /* The Python parser ignores only the following whitespace
4890            characters (\r already is converted to \n). */
4891         if (!(c == ' ' || c == '\t' || c == '\n' || c == '\f')) {
4892             break;
4893         }
4894     }
4895     if (s == expr_end) {
4896         ast_error(c, n, "f-string: empty expression not allowed");
4897         return NULL;
4898     }
4899 
4900     len = expr_end - expr_start;
4901     /* Allocate 3 extra bytes: open paren, close paren, null byte. */
4902     str = PyMem_Malloc(len + 3);
4903     if (str == NULL) {
4904         PyErr_NoMemory();
4905         return NULL;
4906     }
4907 
4908     str[0] = '(';
4909     memcpy(str+1, expr_start, len);
4910     str[len+1] = ')';
4911     str[len+2] = 0;
4912 
4913     PyCompilerFlags cf = _PyCompilerFlags_INIT;
4914     cf.cf_flags = PyCF_ONLY_AST;
4915     mod_n = PyParser_SimpleParseStringFlagsFilename(str, "<fstring>",
4916                                                     Py_eval_input, 0);
4917     if (!mod_n) {
4918         PyMem_Free(str);
4919         return NULL;
4920     }
4921     /* Reuse str to find the correct column offset. */
4922     str[0] = '{';
4923     str[len+1] = '}';
4924     fstring_fix_node_location(n, mod_n, str);
4925     mod = PyAST_FromNode(mod_n, &cf, "<fstring>", c->c_arena);
4926     PyMem_Free(str);
4927     PyNode_Free(mod_n);
4928     if (!mod)
4929         return NULL;
4930     return mod->v.Expression.body;
4931 }
4932 
4933 /* Return -1 on error.
4934 
4935    Return 0 if we reached the end of the literal.
4936 
4937    Return 1 if we haven't reached the end of the literal, but we want
4938    the caller to process the literal up to this point. Used for
4939    doubled braces.
4940 */
4941 static int
fstring_find_literal(const char ** str,const char * end,int raw,PyObject ** literal,int recurse_lvl,struct compiling * c,const node * n)4942 fstring_find_literal(const char **str, const char *end, int raw,
4943                      PyObject **literal, int recurse_lvl,
4944                      struct compiling *c, const node *n)
4945 {
4946     /* Get any literal string. It ends when we hit an un-doubled left
4947        brace (which isn't part of a unicode name escape such as
4948        "\N{EULER CONSTANT}"), or the end of the string. */
4949 
4950     const char *s = *str;
4951     const char *literal_start = s;
4952     int result = 0;
4953 
4954     assert(*literal == NULL);
4955     while (s < end) {
4956         char ch = *s++;
4957         if (!raw && ch == '\\' && s < end) {
4958             ch = *s++;
4959             if (ch == 'N') {
4960                 if (s < end && *s++ == '{') {
4961                     while (s < end && *s++ != '}') {
4962                     }
4963                     continue;
4964                 }
4965                 break;
4966             }
4967             if (ch == '{' && warn_invalid_escape_sequence(c, n, ch) < 0) {
4968                 return -1;
4969             }
4970         }
4971         if (ch == '{' || ch == '}') {
4972             /* Check for doubled braces, but only at the top level. If
4973                we checked at every level, then f'{0:{3}}' would fail
4974                with the two closing braces. */
4975             if (recurse_lvl == 0) {
4976                 if (s < end && *s == ch) {
4977                     /* We're going to tell the caller that the literal ends
4978                        here, but that they should continue scanning. But also
4979                        skip over the second brace when we resume scanning. */
4980                     *str = s + 1;
4981                     result = 1;
4982                     goto done;
4983                 }
4984 
4985                 /* Where a single '{' is the start of a new expression, a
4986                    single '}' is not allowed. */
4987                 if (ch == '}') {
4988                     *str = s - 1;
4989                     ast_error(c, n, "f-string: single '}' is not allowed");
4990                     return -1;
4991                 }
4992             }
4993             /* We're either at a '{', which means we're starting another
4994                expression; or a '}', which means we're at the end of this
4995                f-string (for a nested format_spec). */
4996             s--;
4997             break;
4998         }
4999     }
5000     *str = s;
5001     assert(s <= end);
5002     assert(s == end || *s == '{' || *s == '}');
5003 done:
5004     if (literal_start != s) {
5005         if (raw)
5006             *literal = PyUnicode_DecodeUTF8Stateful(literal_start,
5007                                                     s - literal_start,
5008                                                     NULL, NULL);
5009         else
5010             *literal = decode_unicode_with_escapes(c, n, literal_start,
5011                                                    s - literal_start);
5012         if (!*literal)
5013             return -1;
5014     }
5015     return result;
5016 }
5017 
5018 /* Forward declaration because parsing is recursive. */
5019 static expr_ty
5020 fstring_parse(const char **str, const char *end, int raw, int recurse_lvl,
5021               struct compiling *c, const node *n);
5022 
5023 /* Parse the f-string at *str, ending at end.  We know *str starts an
5024    expression (so it must be a '{'). Returns the FormattedValue node, which
5025    includes the expression, conversion character, format_spec expression, and
5026    optionally the text of the expression (if = is used).
5027 
5028    Note that I don't do a perfect job here: I don't make sure that a
5029    closing brace doesn't match an opening paren, for example. It
5030    doesn't need to error on all invalid expressions, just correctly
5031    find the end of all valid ones. Any errors inside the expression
5032    will be caught when we parse it later.
5033 
5034    *expression is set to the expression.  For an '=' "debug" expression,
5035    *expr_text is set to the debug text (the original text of the expression,
5036    including the '=' and any whitespace around it, as a string object).  If
5037    not a debug expression, *expr_text set to NULL. */
5038 static int
fstring_find_expr(const char ** str,const char * end,int raw,int recurse_lvl,PyObject ** expr_text,expr_ty * expression,struct compiling * c,const node * n)5039 fstring_find_expr(const char **str, const char *end, int raw, int recurse_lvl,
5040                   PyObject **expr_text, expr_ty *expression,
5041                   struct compiling *c, const node *n)
5042 {
5043     /* Return -1 on error, else 0. */
5044 
5045     const char *expr_start;
5046     const char *expr_end;
5047     expr_ty simple_expression;
5048     expr_ty format_spec = NULL; /* Optional format specifier. */
5049     int conversion = -1; /* The conversion char.  Use default if not
5050                             specified, or !r if using = and no format
5051                             spec. */
5052 
5053     /* 0 if we're not in a string, else the quote char we're trying to
5054        match (single or double quote). */
5055     char quote_char = 0;
5056 
5057     /* If we're inside a string, 1=normal, 3=triple-quoted. */
5058     int string_type = 0;
5059 
5060     /* Keep track of nesting level for braces/parens/brackets in
5061        expressions. */
5062     Py_ssize_t nested_depth = 0;
5063     char parenstack[MAXLEVEL];
5064 
5065     *expr_text = NULL;
5066 
5067     /* Can only nest one level deep. */
5068     if (recurse_lvl >= 2) {
5069         ast_error(c, n, "f-string: expressions nested too deeply");
5070         goto error;
5071     }
5072 
5073     /* The first char must be a left brace, or we wouldn't have gotten
5074        here. Skip over it. */
5075     assert(**str == '{');
5076     *str += 1;
5077 
5078     expr_start = *str;
5079     for (; *str < end; (*str)++) {
5080         char ch;
5081 
5082         /* Loop invariants. */
5083         assert(nested_depth >= 0);
5084         assert(*str >= expr_start && *str < end);
5085         if (quote_char)
5086             assert(string_type == 1 || string_type == 3);
5087         else
5088             assert(string_type == 0);
5089 
5090         ch = **str;
5091         /* Nowhere inside an expression is a backslash allowed. */
5092         if (ch == '\\') {
5093             /* Error: can't include a backslash character, inside
5094                parens or strings or not. */
5095             ast_error(c, n,
5096                       "f-string expression part "
5097                       "cannot include a backslash");
5098             goto error;
5099         }
5100         if (quote_char) {
5101             /* We're inside a string. See if we're at the end. */
5102             /* This code needs to implement the same non-error logic
5103                as tok_get from tokenizer.c, at the letter_quote
5104                label. To actually share that code would be a
5105                nightmare. But, it's unlikely to change and is small,
5106                so duplicate it here. Note we don't need to catch all
5107                of the errors, since they'll be caught when parsing the
5108                expression. We just need to match the non-error
5109                cases. Thus we can ignore \n in single-quoted strings,
5110                for example. Or non-terminated strings. */
5111             if (ch == quote_char) {
5112                 /* Does this match the string_type (single or triple
5113                    quoted)? */
5114                 if (string_type == 3) {
5115                     if (*str+2 < end && *(*str+1) == ch && *(*str+2) == ch) {
5116                         /* We're at the end of a triple quoted string. */
5117                         *str += 2;
5118                         string_type = 0;
5119                         quote_char = 0;
5120                         continue;
5121                     }
5122                 } else {
5123                     /* We're at the end of a normal string. */
5124                     quote_char = 0;
5125                     string_type = 0;
5126                     continue;
5127                 }
5128             }
5129         } else if (ch == '\'' || ch == '"') {
5130             /* Is this a triple quoted string? */
5131             if (*str+2 < end && *(*str+1) == ch && *(*str+2) == ch) {
5132                 string_type = 3;
5133                 *str += 2;
5134             } else {
5135                 /* Start of a normal string. */
5136                 string_type = 1;
5137             }
5138             /* Start looking for the end of the string. */
5139             quote_char = ch;
5140         } else if (ch == '[' || ch == '{' || ch == '(') {
5141             if (nested_depth >= MAXLEVEL) {
5142                 ast_error(c, n, "f-string: too many nested parenthesis");
5143                 goto error;
5144             }
5145             parenstack[nested_depth] = ch;
5146             nested_depth++;
5147         } else if (ch == '#') {
5148             /* Error: can't include a comment character, inside parens
5149                or not. */
5150             ast_error(c, n, "f-string expression part cannot include '#'");
5151             goto error;
5152         } else if (nested_depth == 0 &&
5153                    (ch == '!' || ch == ':' || ch == '}' ||
5154                     ch == '=' || ch == '>' || ch == '<')) {
5155             /* See if there's a next character. */
5156             if (*str+1 < end) {
5157                 char next = *(*str+1);
5158 
5159                 /* For "!=". since '=' is not an allowed conversion character,
5160                    nothing is lost in this test. */
5161                 if ((ch == '!' && next == '=') ||   /* != */
5162                     (ch == '=' && next == '=') ||   /* == */
5163                     (ch == '<' && next == '=') ||   /* <= */
5164                     (ch == '>' && next == '=')      /* >= */
5165                     ) {
5166                     *str += 1;
5167                     continue;
5168                 }
5169                 /* Don't get out of the loop for these, if they're single
5170                    chars (not part of 2-char tokens). If by themselves, they
5171                    don't end an expression (unlike say '!'). */
5172                 if (ch == '>' || ch == '<') {
5173                     continue;
5174                 }
5175             }
5176 
5177             /* Normal way out of this loop. */
5178             break;
5179         } else if (ch == ']' || ch == '}' || ch == ')') {
5180             if (!nested_depth) {
5181                 ast_error(c, n, "f-string: unmatched '%c'", ch);
5182                 goto error;
5183             }
5184             nested_depth--;
5185             int opening = parenstack[nested_depth];
5186             if (!((opening == '(' && ch == ')') ||
5187                   (opening == '[' && ch == ']') ||
5188                   (opening == '{' && ch == '}')))
5189             {
5190                 ast_error(c, n,
5191                           "f-string: closing parenthesis '%c' "
5192                           "does not match opening parenthesis '%c'",
5193                           ch, opening);
5194                 goto error;
5195             }
5196         } else {
5197             /* Just consume this char and loop around. */
5198         }
5199     }
5200     expr_end = *str;
5201     /* If we leave this loop in a string or with mismatched parens, we
5202        don't care. We'll get a syntax error when compiling the
5203        expression. But, we can produce a better error message, so
5204        let's just do that.*/
5205     if (quote_char) {
5206         ast_error(c, n, "f-string: unterminated string");
5207         goto error;
5208     }
5209     if (nested_depth) {
5210         int opening = parenstack[nested_depth - 1];
5211         ast_error(c, n, "f-string: unmatched '%c'", opening);
5212         goto error;
5213     }
5214 
5215     if (*str >= end)
5216         goto unexpected_end_of_string;
5217 
5218     /* Compile the expression as soon as possible, so we show errors
5219        related to the expression before errors related to the
5220        conversion or format_spec. */
5221     simple_expression = fstring_compile_expr(expr_start, expr_end, c, n);
5222     if (!simple_expression)
5223         goto error;
5224 
5225     /* Check for =, which puts the text value of the expression in
5226        expr_text. */
5227     if (**str == '=') {
5228         if (c->c_feature_version < 8) {
5229             ast_error(c, n,
5230                       "f-string: self documenting expressions are "
5231                       "only supported in Python 3.8 and greater");
5232             goto error;
5233         }
5234         *str += 1;
5235 
5236         /* Skip over ASCII whitespace.  No need to test for end of string
5237            here, since we know there's at least a trailing quote somewhere
5238            ahead. */
5239         while (Py_ISSPACE(**str)) {
5240             *str += 1;
5241         }
5242 
5243         /* Set *expr_text to the text of the expression. */
5244         *expr_text = PyUnicode_FromStringAndSize(expr_start, *str-expr_start);
5245         if (!*expr_text) {
5246             goto error;
5247         }
5248     }
5249 
5250     /* Check for a conversion char, if present. */
5251     if (**str == '!') {
5252         *str += 1;
5253         if (*str >= end)
5254             goto unexpected_end_of_string;
5255 
5256         conversion = **str;
5257         *str += 1;
5258 
5259         /* Validate the conversion. */
5260         if (!(conversion == 's' || conversion == 'r' || conversion == 'a')) {
5261             ast_error(c, n,
5262                       "f-string: invalid conversion character: "
5263                       "expected 's', 'r', or 'a'");
5264             goto error;
5265         }
5266 
5267     }
5268 
5269     /* Check for the format spec, if present. */
5270     if (*str >= end)
5271         goto unexpected_end_of_string;
5272     if (**str == ':') {
5273         *str += 1;
5274         if (*str >= end)
5275             goto unexpected_end_of_string;
5276 
5277         /* Parse the format spec. */
5278         format_spec = fstring_parse(str, end, raw, recurse_lvl+1, c, n);
5279         if (!format_spec)
5280             goto error;
5281     }
5282 
5283     if (*str >= end || **str != '}')
5284         goto unexpected_end_of_string;
5285 
5286     /* We're at a right brace. Consume it. */
5287     assert(*str < end);
5288     assert(**str == '}');
5289     *str += 1;
5290 
5291     /* If we're in = mode (detected by non-NULL expr_text), and have no format
5292        spec and no explict conversion, set the conversion to 'r'. */
5293     if (*expr_text && format_spec == NULL && conversion == -1) {
5294         conversion = 'r';
5295     }
5296 
5297     /* And now create the FormattedValue node that represents this
5298        entire expression with the conversion and format spec. */
5299     *expression = FormattedValue(simple_expression, conversion,
5300                                  format_spec, LINENO(n),
5301                                  n->n_col_offset, n->n_end_lineno,
5302                                  n->n_end_col_offset, c->c_arena);
5303     if (!*expression)
5304         goto error;
5305 
5306     return 0;
5307 
5308 unexpected_end_of_string:
5309     ast_error(c, n, "f-string: expecting '}'");
5310     /* Falls through to error. */
5311 
5312 error:
5313     Py_XDECREF(*expr_text);
5314     return -1;
5315 
5316 }
5317 
5318 /* Return -1 on error.
5319 
5320    Return 0 if we have a literal (possible zero length) and an
5321    expression (zero length if at the end of the string.
5322 
5323    Return 1 if we have a literal, but no expression, and we want the
5324    caller to call us again. This is used to deal with doubled
5325    braces.
5326 
5327    When called multiple times on the string 'a{{b{0}c', this function
5328    will return:
5329 
5330    1. the literal 'a{' with no expression, and a return value
5331       of 1. Despite the fact that there's no expression, the return
5332       value of 1 means we're not finished yet.
5333 
5334    2. the literal 'b' and the expression '0', with a return value of
5335       0. The fact that there's an expression means we're not finished.
5336 
5337    3. literal 'c' with no expression and a return value of 0. The
5338       combination of the return value of 0 with no expression means
5339       we're finished.
5340 */
5341 static int
fstring_find_literal_and_expr(const char ** str,const char * end,int raw,int recurse_lvl,PyObject ** literal,PyObject ** expr_text,expr_ty * expression,struct compiling * c,const node * n)5342 fstring_find_literal_and_expr(const char **str, const char *end, int raw,
5343                               int recurse_lvl, PyObject **literal,
5344                               PyObject **expr_text, expr_ty *expression,
5345                               struct compiling *c, const node *n)
5346 {
5347     int result;
5348 
5349     assert(*literal == NULL && *expression == NULL);
5350 
5351     /* Get any literal string. */
5352     result = fstring_find_literal(str, end, raw, literal, recurse_lvl, c, n);
5353     if (result < 0)
5354         goto error;
5355 
5356     assert(result == 0 || result == 1);
5357 
5358     if (result == 1)
5359         /* We have a literal, but don't look at the expression. */
5360         return 1;
5361 
5362     if (*str >= end || **str == '}')
5363         /* We're at the end of the string or the end of a nested
5364            f-string: no expression. The top-level error case where we
5365            expect to be at the end of the string but we're at a '}' is
5366            handled later. */
5367         return 0;
5368 
5369     /* We must now be the start of an expression, on a '{'. */
5370     assert(**str == '{');
5371 
5372     if (fstring_find_expr(str, end, raw, recurse_lvl, expr_text,
5373                           expression, c, n) < 0)
5374         goto error;
5375 
5376     return 0;
5377 
5378 error:
5379     Py_CLEAR(*literal);
5380     return -1;
5381 }
5382 
5383 #define EXPRLIST_N_CACHED  64
5384 
5385 typedef struct {
5386     /* Incrementally build an array of expr_ty, so be used in an
5387        asdl_seq. Cache some small but reasonably sized number of
5388        expr_ty's, and then after that start dynamically allocating,
5389        doubling the number allocated each time. Note that the f-string
5390        f'{0}a{1}' contains 3 expr_ty's: 2 FormattedValue's, and one
5391        Constant for the literal 'a'. So you add expr_ty's about twice as
5392        fast as you add expressions in an f-string. */
5393 
5394     Py_ssize_t allocated;  /* Number we've allocated. */
5395     Py_ssize_t size;       /* Number we've used. */
5396     expr_ty    *p;         /* Pointer to the memory we're actually
5397                               using. Will point to 'data' until we
5398                               start dynamically allocating. */
5399     expr_ty    data[EXPRLIST_N_CACHED];
5400 } ExprList;
5401 
5402 #ifdef NDEBUG
5403 #define ExprList_check_invariants(l)
5404 #else
5405 static void
ExprList_check_invariants(ExprList * l)5406 ExprList_check_invariants(ExprList *l)
5407 {
5408     /* Check our invariants. Make sure this object is "live", and
5409        hasn't been deallocated. */
5410     assert(l->size >= 0);
5411     assert(l->p != NULL);
5412     if (l->size <= EXPRLIST_N_CACHED)
5413         assert(l->data == l->p);
5414 }
5415 #endif
5416 
5417 static void
ExprList_Init(ExprList * l)5418 ExprList_Init(ExprList *l)
5419 {
5420     l->allocated = EXPRLIST_N_CACHED;
5421     l->size = 0;
5422 
5423     /* Until we start allocating dynamically, p points to data. */
5424     l->p = l->data;
5425 
5426     ExprList_check_invariants(l);
5427 }
5428 
5429 static int
ExprList_Append(ExprList * l,expr_ty exp)5430 ExprList_Append(ExprList *l, expr_ty exp)
5431 {
5432     ExprList_check_invariants(l);
5433     if (l->size >= l->allocated) {
5434         /* We need to alloc (or realloc) the memory. */
5435         Py_ssize_t new_size = l->allocated * 2;
5436 
5437         /* See if we've ever allocated anything dynamically. */
5438         if (l->p == l->data) {
5439             Py_ssize_t i;
5440             /* We're still using the cached data. Switch to
5441                alloc-ing. */
5442             l->p = PyMem_Malloc(sizeof(expr_ty) * new_size);
5443             if (!l->p)
5444                 return -1;
5445             /* Copy the cached data into the new buffer. */
5446             for (i = 0; i < l->size; i++)
5447                 l->p[i] = l->data[i];
5448         } else {
5449             /* Just realloc. */
5450             expr_ty *tmp = PyMem_Realloc(l->p, sizeof(expr_ty) * new_size);
5451             if (!tmp) {
5452                 PyMem_Free(l->p);
5453                 l->p = NULL;
5454                 return -1;
5455             }
5456             l->p = tmp;
5457         }
5458 
5459         l->allocated = new_size;
5460         assert(l->allocated == 2 * l->size);
5461     }
5462 
5463     l->p[l->size++] = exp;
5464 
5465     ExprList_check_invariants(l);
5466     return 0;
5467 }
5468 
5469 static void
ExprList_Dealloc(ExprList * l)5470 ExprList_Dealloc(ExprList *l)
5471 {
5472     ExprList_check_invariants(l);
5473 
5474     /* If there's been an error, or we've never dynamically allocated,
5475        do nothing. */
5476     if (!l->p || l->p == l->data) {
5477         /* Do nothing. */
5478     } else {
5479         /* We have dynamically allocated. Free the memory. */
5480         PyMem_Free(l->p);
5481     }
5482     l->p = NULL;
5483     l->size = -1;
5484 }
5485 
5486 static asdl_seq *
ExprList_Finish(ExprList * l,PyArena * arena)5487 ExprList_Finish(ExprList *l, PyArena *arena)
5488 {
5489     asdl_seq *seq;
5490 
5491     ExprList_check_invariants(l);
5492 
5493     /* Allocate the asdl_seq and copy the expressions in to it. */
5494     seq = _Py_asdl_seq_new(l->size, arena);
5495     if (seq) {
5496         Py_ssize_t i;
5497         for (i = 0; i < l->size; i++)
5498             asdl_seq_SET(seq, i, l->p[i]);
5499     }
5500     ExprList_Dealloc(l);
5501     return seq;
5502 }
5503 
5504 /* The FstringParser is designed to add a mix of strings and
5505    f-strings, and concat them together as needed. Ultimately, it
5506    generates an expr_ty. */
5507 typedef struct {
5508     PyObject *last_str;
5509     ExprList expr_list;
5510     int fmode;
5511 } FstringParser;
5512 
5513 #ifdef NDEBUG
5514 #define FstringParser_check_invariants(state)
5515 #else
5516 static void
FstringParser_check_invariants(FstringParser * state)5517 FstringParser_check_invariants(FstringParser *state)
5518 {
5519     if (state->last_str)
5520         assert(PyUnicode_CheckExact(state->last_str));
5521     ExprList_check_invariants(&state->expr_list);
5522 }
5523 #endif
5524 
5525 static void
FstringParser_Init(FstringParser * state)5526 FstringParser_Init(FstringParser *state)
5527 {
5528     state->last_str = NULL;
5529     state->fmode = 0;
5530     ExprList_Init(&state->expr_list);
5531     FstringParser_check_invariants(state);
5532 }
5533 
5534 static void
FstringParser_Dealloc(FstringParser * state)5535 FstringParser_Dealloc(FstringParser *state)
5536 {
5537     FstringParser_check_invariants(state);
5538 
5539     Py_XDECREF(state->last_str);
5540     ExprList_Dealloc(&state->expr_list);
5541 }
5542 
5543 /* Constants for the following */
5544 static PyObject *u_kind;
5545 
5546 /* Compute 'kind' field for string Constant (either 'u' or None) */
5547 static PyObject *
make_kind(struct compiling * c,const node * n)5548 make_kind(struct compiling *c, const node *n)
5549 {
5550     char *s = NULL;
5551     PyObject *kind = NULL;
5552 
5553     /* Find the first string literal, if any */
5554     while (TYPE(n) != STRING) {
5555         if (NCH(n) == 0)
5556             return NULL;
5557         n = CHILD(n, 0);
5558     }
5559     REQ(n, STRING);
5560 
5561     /* If it starts with 'u', return a PyUnicode "u" string */
5562     s = STR(n);
5563     if (s && *s == 'u') {
5564         if (!u_kind) {
5565             u_kind = PyUnicode_InternFromString("u");
5566             if (!u_kind)
5567                 return NULL;
5568         }
5569         kind = u_kind;
5570         if (PyArena_AddPyObject(c->c_arena, kind) < 0) {
5571             return NULL;
5572         }
5573         Py_INCREF(kind);
5574     }
5575     return kind;
5576 }
5577 
5578 /* Make a Constant node, but decref the PyUnicode object being added. */
5579 static expr_ty
make_str_node_and_del(PyObject ** str,struct compiling * c,const node * n)5580 make_str_node_and_del(PyObject **str, struct compiling *c, const node* n)
5581 {
5582     PyObject *s = *str;
5583     PyObject *kind = NULL;
5584     *str = NULL;
5585     assert(PyUnicode_CheckExact(s));
5586     if (PyArena_AddPyObject(c->c_arena, s) < 0) {
5587         Py_DECREF(s);
5588         return NULL;
5589     }
5590     kind = make_kind(c, n);
5591     if (kind == NULL && PyErr_Occurred())
5592         return NULL;
5593     return Constant(s, kind, LINENO(n), n->n_col_offset,
5594                     n->n_end_lineno, n->n_end_col_offset, c->c_arena);
5595 }
5596 
5597 /* Add a non-f-string (that is, a regular literal string). str is
5598    decref'd. */
5599 static int
FstringParser_ConcatAndDel(FstringParser * state,PyObject * str)5600 FstringParser_ConcatAndDel(FstringParser *state, PyObject *str)
5601 {
5602     FstringParser_check_invariants(state);
5603 
5604     assert(PyUnicode_CheckExact(str));
5605 
5606     if (PyUnicode_GET_LENGTH(str) == 0) {
5607         Py_DECREF(str);
5608         return 0;
5609     }
5610 
5611     if (!state->last_str) {
5612         /* We didn't have a string before, so just remember this one. */
5613         state->last_str = str;
5614     } else {
5615         /* Concatenate this with the previous string. */
5616         PyUnicode_AppendAndDel(&state->last_str, str);
5617         if (!state->last_str)
5618             return -1;
5619     }
5620     FstringParser_check_invariants(state);
5621     return 0;
5622 }
5623 
5624 /* Parse an f-string. The f-string is in *str to end, with no
5625    'f' or quotes. */
5626 static int
FstringParser_ConcatFstring(FstringParser * state,const char ** str,const char * end,int raw,int recurse_lvl,struct compiling * c,const node * n)5627 FstringParser_ConcatFstring(FstringParser *state, const char **str,
5628                             const char *end, int raw, int recurse_lvl,
5629                             struct compiling *c, const node *n)
5630 {
5631     FstringParser_check_invariants(state);
5632     state->fmode = 1;
5633 
5634     /* Parse the f-string. */
5635     while (1) {
5636         PyObject *literal = NULL;
5637         PyObject *expr_text = NULL;
5638         expr_ty expression = NULL;
5639 
5640         /* If there's a zero length literal in front of the
5641            expression, literal will be NULL. If we're at the end of
5642            the f-string, expression will be NULL (unless result == 1,
5643            see below). */
5644         int result = fstring_find_literal_and_expr(str, end, raw, recurse_lvl,
5645                                                    &literal, &expr_text,
5646                                                    &expression, c, n);
5647         if (result < 0)
5648             return -1;
5649 
5650         /* Add the literal, if any. */
5651         if (literal && FstringParser_ConcatAndDel(state, literal) < 0) {
5652             Py_XDECREF(expr_text);
5653             return -1;
5654         }
5655         /* Add the expr_text, if any. */
5656         if (expr_text && FstringParser_ConcatAndDel(state, expr_text) < 0) {
5657             return -1;
5658         }
5659 
5660         /* We've dealt with the literal and expr_text, their ownership has
5661            been transferred to the state object.  Don't look at them again. */
5662 
5663         /* See if we should just loop around to get the next literal
5664            and expression, while ignoring the expression this
5665            time. This is used for un-doubling braces, as an
5666            optimization. */
5667         if (result == 1)
5668             continue;
5669 
5670         if (!expression)
5671             /* We're done with this f-string. */
5672             break;
5673 
5674         /* We know we have an expression. Convert any existing string
5675            to a Constant node. */
5676         if (!state->last_str) {
5677             /* Do nothing. No previous literal. */
5678         } else {
5679             /* Convert the existing last_str literal to a Constant node. */
5680             expr_ty str = make_str_node_and_del(&state->last_str, c, n);
5681             if (!str || ExprList_Append(&state->expr_list, str) < 0)
5682                 return -1;
5683         }
5684 
5685         if (ExprList_Append(&state->expr_list, expression) < 0)
5686             return -1;
5687     }
5688 
5689     /* If recurse_lvl is zero, then we must be at the end of the
5690        string. Otherwise, we must be at a right brace. */
5691 
5692     if (recurse_lvl == 0 && *str < end-1) {
5693         ast_error(c, n, "f-string: unexpected end of string");
5694         return -1;
5695     }
5696     if (recurse_lvl != 0 && **str != '}') {
5697         ast_error(c, n, "f-string: expecting '}'");
5698         return -1;
5699     }
5700 
5701     FstringParser_check_invariants(state);
5702     return 0;
5703 }
5704 
5705 /* Convert the partial state reflected in last_str and expr_list to an
5706    expr_ty. The expr_ty can be a Constant, or a JoinedStr. */
5707 static expr_ty
FstringParser_Finish(FstringParser * state,struct compiling * c,const node * n)5708 FstringParser_Finish(FstringParser *state, struct compiling *c,
5709                      const node *n)
5710 {
5711     asdl_seq *seq;
5712 
5713     FstringParser_check_invariants(state);
5714 
5715     /* If we're just a constant string with no expressions, return
5716        that. */
5717     if (!state->fmode) {
5718         assert(!state->expr_list.size);
5719         if (!state->last_str) {
5720             /* Create a zero length string. */
5721             state->last_str = PyUnicode_FromStringAndSize(NULL, 0);
5722             if (!state->last_str)
5723                 goto error;
5724         }
5725         return make_str_node_and_del(&state->last_str, c, n);
5726     }
5727 
5728     /* Create a Constant node out of last_str, if needed. It will be the
5729        last node in our expression list. */
5730     if (state->last_str) {
5731         expr_ty str = make_str_node_and_del(&state->last_str, c, n);
5732         if (!str || ExprList_Append(&state->expr_list, str) < 0)
5733             goto error;
5734     }
5735     /* This has already been freed. */
5736     assert(state->last_str == NULL);
5737 
5738     seq = ExprList_Finish(&state->expr_list, c->c_arena);
5739     if (!seq)
5740         goto error;
5741 
5742     return JoinedStr(seq, LINENO(n), n->n_col_offset,
5743                      n->n_end_lineno, n->n_end_col_offset, c->c_arena);
5744 
5745 error:
5746     FstringParser_Dealloc(state);
5747     return NULL;
5748 }
5749 
5750 /* Given an f-string (with no 'f' or quotes) that's in *str and ends
5751    at end, parse it into an expr_ty.  Return NULL on error.  Adjust
5752    str to point past the parsed portion. */
5753 static expr_ty
fstring_parse(const char ** str,const char * end,int raw,int recurse_lvl,struct compiling * c,const node * n)5754 fstring_parse(const char **str, const char *end, int raw, int recurse_lvl,
5755               struct compiling *c, const node *n)
5756 {
5757     FstringParser state;
5758 
5759     FstringParser_Init(&state);
5760     if (FstringParser_ConcatFstring(&state, str, end, raw, recurse_lvl,
5761                                     c, n) < 0) {
5762         FstringParser_Dealloc(&state);
5763         return NULL;
5764     }
5765 
5766     return FstringParser_Finish(&state, c, n);
5767 }
5768 
5769 /* n is a Python string literal, including the bracketing quote
5770    characters, and r, b, u, &/or f prefixes (if any), and embedded
5771    escape sequences (if any). parsestr parses it, and sets *result to
5772    decoded Python string object.  If the string is an f-string, set
5773    *fstr and *fstrlen to the unparsed string object.  Return 0 if no
5774    errors occurred.
5775 */
5776 static int
parsestr(struct compiling * c,const node * n,int * bytesmode,int * rawmode,PyObject ** result,const char ** fstr,Py_ssize_t * fstrlen)5777 parsestr(struct compiling *c, const node *n, int *bytesmode, int *rawmode,
5778          PyObject **result, const char **fstr, Py_ssize_t *fstrlen)
5779 {
5780     size_t len;
5781     const char *s = STR(n);
5782     int quote = Py_CHARMASK(*s);
5783     int fmode = 0;
5784     *bytesmode = 0;
5785     *rawmode = 0;
5786     *result = NULL;
5787     *fstr = NULL;
5788     if (Py_ISALPHA(quote)) {
5789         while (!*bytesmode || !*rawmode) {
5790             if (quote == 'b' || quote == 'B') {
5791                 quote = *++s;
5792                 *bytesmode = 1;
5793             }
5794             else if (quote == 'u' || quote == 'U') {
5795                 quote = *++s;
5796             }
5797             else if (quote == 'r' || quote == 'R') {
5798                 quote = *++s;
5799                 *rawmode = 1;
5800             }
5801             else if (quote == 'f' || quote == 'F') {
5802                 quote = *++s;
5803                 fmode = 1;
5804             }
5805             else {
5806                 break;
5807             }
5808         }
5809     }
5810 
5811     /* fstrings are only allowed in Python 3.6 and greater */
5812     if (fmode && c->c_feature_version < 6) {
5813         ast_error(c, n, "Format strings are only supported in Python 3.6 and greater");
5814         return -1;
5815     }
5816 
5817     if (fmode && *bytesmode) {
5818         PyErr_BadInternalCall();
5819         return -1;
5820     }
5821     if (quote != '\'' && quote != '\"') {
5822         PyErr_BadInternalCall();
5823         return -1;
5824     }
5825     /* Skip the leading quote char. */
5826     s++;
5827     len = strlen(s);
5828     if (len > INT_MAX) {
5829         PyErr_SetString(PyExc_OverflowError,
5830                         "string to parse is too long");
5831         return -1;
5832     }
5833     if (s[--len] != quote) {
5834         /* Last quote char must match the first. */
5835         PyErr_BadInternalCall();
5836         return -1;
5837     }
5838     if (len >= 4 && s[0] == quote && s[1] == quote) {
5839         /* A triple quoted string. We've already skipped one quote at
5840            the start and one at the end of the string. Now skip the
5841            two at the start. */
5842         s += 2;
5843         len -= 2;
5844         /* And check that the last two match. */
5845         if (s[--len] != quote || s[--len] != quote) {
5846             PyErr_BadInternalCall();
5847             return -1;
5848         }
5849     }
5850 
5851     if (fmode) {
5852         /* Just return the bytes. The caller will parse the resulting
5853            string. */
5854         *fstr = s;
5855         *fstrlen = len;
5856         return 0;
5857     }
5858 
5859     /* Not an f-string. */
5860     /* Avoid invoking escape decoding routines if possible. */
5861     *rawmode = *rawmode || strchr(s, '\\') == NULL;
5862     if (*bytesmode) {
5863         /* Disallow non-ASCII characters. */
5864         const char *ch;
5865         for (ch = s; *ch; ch++) {
5866             if (Py_CHARMASK(*ch) >= 0x80) {
5867                 ast_error(c, n,
5868                           "bytes can only contain ASCII "
5869                           "literal characters.");
5870                 return -1;
5871             }
5872         }
5873         if (*rawmode)
5874             *result = PyBytes_FromStringAndSize(s, len);
5875         else
5876             *result = decode_bytes_with_escapes(c, n, s, len);
5877     } else {
5878         if (*rawmode)
5879             *result = PyUnicode_DecodeUTF8Stateful(s, len, NULL, NULL);
5880         else
5881             *result = decode_unicode_with_escapes(c, n, s, len);
5882     }
5883     return *result == NULL ? -1 : 0;
5884 }
5885 
5886 /* Accepts a STRING+ atom, and produces an expr_ty node. Run through
5887    each STRING atom, and process it as needed. For bytes, just
5888    concatenate them together, and the result will be a Constant node. For
5889    normal strings and f-strings, concatenate them together. The result
5890    will be a Constant node if there were no f-strings; a FormattedValue
5891    node if there's just an f-string (with no leading or trailing
5892    literals), or a JoinedStr node if there are multiple f-strings or
5893    any literals involved. */
5894 static expr_ty
parsestrplus(struct compiling * c,const node * n)5895 parsestrplus(struct compiling *c, const node *n)
5896 {
5897     int bytesmode = 0;
5898     PyObject *bytes_str = NULL;
5899     int i;
5900 
5901     FstringParser state;
5902     FstringParser_Init(&state);
5903 
5904     for (i = 0; i < NCH(n); i++) {
5905         int this_bytesmode;
5906         int this_rawmode;
5907         PyObject *s;
5908         const char *fstr;
5909         Py_ssize_t fstrlen = -1;  /* Silence a compiler warning. */
5910 
5911         REQ(CHILD(n, i), STRING);
5912         if (parsestr(c, CHILD(n, i), &this_bytesmode, &this_rawmode, &s,
5913                      &fstr, &fstrlen) != 0)
5914             goto error;
5915 
5916         /* Check that we're not mixing bytes with unicode. */
5917         if (i != 0 && bytesmode != this_bytesmode) {
5918             ast_error(c, n, "cannot mix bytes and nonbytes literals");
5919             /* s is NULL if the current string part is an f-string. */
5920             Py_XDECREF(s);
5921             goto error;
5922         }
5923         bytesmode = this_bytesmode;
5924 
5925         if (fstr != NULL) {
5926             int result;
5927             assert(s == NULL && !bytesmode);
5928             /* This is an f-string. Parse and concatenate it. */
5929             result = FstringParser_ConcatFstring(&state, &fstr, fstr+fstrlen,
5930                                                  this_rawmode, 0, c, n);
5931             if (result < 0)
5932                 goto error;
5933         } else {
5934             /* A string or byte string. */
5935             assert(s != NULL && fstr == NULL);
5936 
5937             assert(bytesmode ? PyBytes_CheckExact(s) :
5938                    PyUnicode_CheckExact(s));
5939 
5940             if (bytesmode) {
5941                 /* For bytes, concat as we go. */
5942                 if (i == 0) {
5943                     /* First time, just remember this value. */
5944                     bytes_str = s;
5945                 } else {
5946                     PyBytes_ConcatAndDel(&bytes_str, s);
5947                     if (!bytes_str)
5948                         goto error;
5949                 }
5950             } else {
5951                 /* This is a regular string. Concatenate it. */
5952                 if (FstringParser_ConcatAndDel(&state, s) < 0)
5953                     goto error;
5954             }
5955         }
5956     }
5957     if (bytesmode) {
5958         /* Just return the bytes object and we're done. */
5959         if (PyArena_AddPyObject(c->c_arena, bytes_str) < 0)
5960             goto error;
5961         return Constant(bytes_str, NULL, LINENO(n), n->n_col_offset,
5962                         n->n_end_lineno, n->n_end_col_offset, c->c_arena);
5963     }
5964 
5965     /* We're not a bytes string, bytes_str should never have been set. */
5966     assert(bytes_str == NULL);
5967 
5968     return FstringParser_Finish(&state, c, n);
5969 
5970 error:
5971     Py_XDECREF(bytes_str);
5972     FstringParser_Dealloc(&state);
5973     return NULL;
5974 }
5975 
5976 PyObject *
_PyAST_GetDocString(asdl_seq * body)5977 _PyAST_GetDocString(asdl_seq *body)
5978 {
5979     if (!asdl_seq_LEN(body)) {
5980         return NULL;
5981     }
5982     stmt_ty st = (stmt_ty)asdl_seq_GET(body, 0);
5983     if (st->kind != Expr_kind) {
5984         return NULL;
5985     }
5986     expr_ty e = st->v.Expr.value;
5987     if (e->kind == Constant_kind && PyUnicode_CheckExact(e->v.Constant.value)) {
5988         return e->v.Constant.value;
5989     }
5990     return NULL;
5991 }
5992