1 /*
2  * This file includes functions to transform a concrete syntax tree (CST) to
3  * an abstract syntax tree (AST). The main function is PyAST_FromNode().
4  *
5  */
6 #include "Python.h"
7 #include "Python-ast.h"
8 #include "node.h"
9 #include "ast.h"
10 #include "token.h"
11 #include "pythonrun.h"
12 
13 #include <assert.h>
14 #include <stdbool.h>
15 
16 #define MAXLEVEL 200    /* Max parentheses level */
17 
18 static int validate_stmts(asdl_seq *);
19 static int validate_exprs(asdl_seq *, expr_context_ty, int);
20 static int validate_nonempty_seq(asdl_seq *, const char *, const char *);
21 static int validate_stmt(stmt_ty);
22 static int validate_expr(expr_ty, expr_context_ty);
23 
24 static int
validate_name(PyObject * name)25 validate_name(PyObject *name)
26 {
27     assert(PyUnicode_Check(name));
28     static const char * const forbidden[] = {
29         "None",
30         "True",
31         "False",
32         NULL
33     };
34     for (int i = 0; forbidden[i] != NULL; i++) {
35         if (_PyUnicode_EqualToASCIIString(name, forbidden[i])) {
36             PyErr_Format(PyExc_ValueError, "Name node can't be used with '%s' constant", forbidden[i]);
37             return 0;
38         }
39     }
40     return 1;
41 }
42 
43 static int
validate_comprehension(asdl_seq * gens)44 validate_comprehension(asdl_seq *gens)
45 {
46     Py_ssize_t i;
47     if (!asdl_seq_LEN(gens)) {
48         PyErr_SetString(PyExc_ValueError, "comprehension with no generators");
49         return 0;
50     }
51     for (i = 0; i < asdl_seq_LEN(gens); i++) {
52         comprehension_ty comp = asdl_seq_GET(gens, i);
53         if (!validate_expr(comp->target, Store) ||
54             !validate_expr(comp->iter, Load) ||
55             !validate_exprs(comp->ifs, Load, 0))
56             return 0;
57     }
58     return 1;
59 }
60 
61 static int
validate_keywords(asdl_seq * keywords)62 validate_keywords(asdl_seq *keywords)
63 {
64     Py_ssize_t i;
65     for (i = 0; i < asdl_seq_LEN(keywords); i++)
66         if (!validate_expr(((keyword_ty)asdl_seq_GET(keywords, i))->value, Load))
67             return 0;
68     return 1;
69 }
70 
71 static int
validate_args(asdl_seq * args)72 validate_args(asdl_seq *args)
73 {
74     Py_ssize_t i;
75     for (i = 0; i < asdl_seq_LEN(args); i++) {
76         arg_ty arg = asdl_seq_GET(args, i);
77         if (arg->annotation && !validate_expr(arg->annotation, Load))
78             return 0;
79     }
80     return 1;
81 }
82 
83 static const char *
expr_context_name(expr_context_ty ctx)84 expr_context_name(expr_context_ty ctx)
85 {
86     switch (ctx) {
87     case Load:
88         return "Load";
89     case Store:
90         return "Store";
91     case Del:
92         return "Del";
93     default:
94         Py_UNREACHABLE();
95     }
96 }
97 
98 static int
validate_arguments(arguments_ty args)99 validate_arguments(arguments_ty args)
100 {
101     if (!validate_args(args->posonlyargs) || !validate_args(args->args)) {
102         return 0;
103     }
104     if (args->vararg && args->vararg->annotation
105         && !validate_expr(args->vararg->annotation, Load)) {
106             return 0;
107     }
108     if (!validate_args(args->kwonlyargs))
109         return 0;
110     if (args->kwarg && args->kwarg->annotation
111         && !validate_expr(args->kwarg->annotation, Load)) {
112             return 0;
113     }
114     if (asdl_seq_LEN(args->defaults) > asdl_seq_LEN(args->posonlyargs) + asdl_seq_LEN(args->args)) {
115         PyErr_SetString(PyExc_ValueError, "more positional defaults than args on arguments");
116         return 0;
117     }
118     if (asdl_seq_LEN(args->kw_defaults) != asdl_seq_LEN(args->kwonlyargs)) {
119         PyErr_SetString(PyExc_ValueError, "length of kwonlyargs is not the same as "
120                         "kw_defaults on arguments");
121         return 0;
122     }
123     return validate_exprs(args->defaults, Load, 0) && validate_exprs(args->kw_defaults, Load, 1);
124 }
125 
126 static int
validate_constant(PyObject * value)127 validate_constant(PyObject *value)
128 {
129     if (value == Py_None || value == Py_Ellipsis)
130         return 1;
131 
132     if (PyLong_CheckExact(value)
133             || PyFloat_CheckExact(value)
134             || PyComplex_CheckExact(value)
135             || PyBool_Check(value)
136             || PyUnicode_CheckExact(value)
137             || PyBytes_CheckExact(value))
138         return 1;
139 
140     if (PyTuple_CheckExact(value) || PyFrozenSet_CheckExact(value)) {
141         PyObject *it;
142 
143         it = PyObject_GetIter(value);
144         if (it == NULL)
145             return 0;
146 
147         while (1) {
148             PyObject *item = PyIter_Next(it);
149             if (item == NULL) {
150                 if (PyErr_Occurred()) {
151                     Py_DECREF(it);
152                     return 0;
153                 }
154                 break;
155             }
156 
157             if (!validate_constant(item)) {
158                 Py_DECREF(it);
159                 Py_DECREF(item);
160                 return 0;
161             }
162             Py_DECREF(item);
163         }
164 
165         Py_DECREF(it);
166         return 1;
167     }
168 
169     if (!PyErr_Occurred()) {
170         PyErr_Format(PyExc_TypeError,
171                      "got an invalid type in Constant: %s",
172                      _PyType_Name(Py_TYPE(value)));
173     }
174     return 0;
175 }
176 
177 static int
validate_expr(expr_ty exp,expr_context_ty ctx)178 validate_expr(expr_ty exp, expr_context_ty ctx)
179 {
180     int check_ctx = 1;
181     expr_context_ty actual_ctx;
182 
183     /* First check expression context. */
184     switch (exp->kind) {
185     case Attribute_kind:
186         actual_ctx = exp->v.Attribute.ctx;
187         break;
188     case Subscript_kind:
189         actual_ctx = exp->v.Subscript.ctx;
190         break;
191     case Starred_kind:
192         actual_ctx = exp->v.Starred.ctx;
193         break;
194     case Name_kind:
195         if (!validate_name(exp->v.Name.id)) {
196             return 0;
197         }
198         actual_ctx = exp->v.Name.ctx;
199         break;
200     case List_kind:
201         actual_ctx = exp->v.List.ctx;
202         break;
203     case Tuple_kind:
204         actual_ctx = exp->v.Tuple.ctx;
205         break;
206     default:
207         if (ctx != Load) {
208             PyErr_Format(PyExc_ValueError, "expression which can't be "
209                          "assigned to in %s context", expr_context_name(ctx));
210             return 0;
211         }
212         check_ctx = 0;
213         /* set actual_ctx to prevent gcc warning */
214         actual_ctx = 0;
215     }
216     if (check_ctx && actual_ctx != ctx) {
217         PyErr_Format(PyExc_ValueError, "expression must have %s context but has %s instead",
218                      expr_context_name(ctx), expr_context_name(actual_ctx));
219         return 0;
220     }
221 
222     /* Now validate expression. */
223     switch (exp->kind) {
224     case BoolOp_kind:
225         if (asdl_seq_LEN(exp->v.BoolOp.values) < 2) {
226             PyErr_SetString(PyExc_ValueError, "BoolOp with less than 2 values");
227             return 0;
228         }
229         return validate_exprs(exp->v.BoolOp.values, Load, 0);
230     case BinOp_kind:
231         return validate_expr(exp->v.BinOp.left, Load) &&
232             validate_expr(exp->v.BinOp.right, Load);
233     case UnaryOp_kind:
234         return validate_expr(exp->v.UnaryOp.operand, Load);
235     case Lambda_kind:
236         return validate_arguments(exp->v.Lambda.args) &&
237             validate_expr(exp->v.Lambda.body, Load);
238     case IfExp_kind:
239         return validate_expr(exp->v.IfExp.test, Load) &&
240             validate_expr(exp->v.IfExp.body, Load) &&
241             validate_expr(exp->v.IfExp.orelse, Load);
242     case Dict_kind:
243         if (asdl_seq_LEN(exp->v.Dict.keys) != asdl_seq_LEN(exp->v.Dict.values)) {
244             PyErr_SetString(PyExc_ValueError,
245                             "Dict doesn't have the same number of keys as values");
246             return 0;
247         }
248         /* null_ok=1 for keys expressions to allow dict unpacking to work in
249            dict literals, i.e. ``{**{a:b}}`` */
250         return validate_exprs(exp->v.Dict.keys, Load, /*null_ok=*/ 1) &&
251             validate_exprs(exp->v.Dict.values, Load, /*null_ok=*/ 0);
252     case Set_kind:
253         return validate_exprs(exp->v.Set.elts, Load, 0);
254 #define COMP(NAME) \
255         case NAME ## _kind: \
256             return validate_comprehension(exp->v.NAME.generators) && \
257                 validate_expr(exp->v.NAME.elt, Load);
258     COMP(ListComp)
259     COMP(SetComp)
260     COMP(GeneratorExp)
261 #undef COMP
262     case DictComp_kind:
263         return validate_comprehension(exp->v.DictComp.generators) &&
264             validate_expr(exp->v.DictComp.key, Load) &&
265             validate_expr(exp->v.DictComp.value, Load);
266     case Yield_kind:
267         return !exp->v.Yield.value || validate_expr(exp->v.Yield.value, Load);
268     case YieldFrom_kind:
269         return validate_expr(exp->v.YieldFrom.value, Load);
270     case Await_kind:
271         return validate_expr(exp->v.Await.value, Load);
272     case Compare_kind:
273         if (!asdl_seq_LEN(exp->v.Compare.comparators)) {
274             PyErr_SetString(PyExc_ValueError, "Compare with no comparators");
275             return 0;
276         }
277         if (asdl_seq_LEN(exp->v.Compare.comparators) !=
278             asdl_seq_LEN(exp->v.Compare.ops)) {
279             PyErr_SetString(PyExc_ValueError, "Compare has a different number "
280                             "of comparators and operands");
281             return 0;
282         }
283         return validate_exprs(exp->v.Compare.comparators, Load, 0) &&
284             validate_expr(exp->v.Compare.left, Load);
285     case Call_kind:
286         return validate_expr(exp->v.Call.func, Load) &&
287             validate_exprs(exp->v.Call.args, Load, 0) &&
288             validate_keywords(exp->v.Call.keywords);
289     case Constant_kind:
290         if (!validate_constant(exp->v.Constant.value)) {
291             return 0;
292         }
293         return 1;
294     case JoinedStr_kind:
295         return validate_exprs(exp->v.JoinedStr.values, Load, 0);
296     case FormattedValue_kind:
297         if (validate_expr(exp->v.FormattedValue.value, Load) == 0)
298             return 0;
299         if (exp->v.FormattedValue.format_spec)
300             return validate_expr(exp->v.FormattedValue.format_spec, Load);
301         return 1;
302     case Attribute_kind:
303         return validate_expr(exp->v.Attribute.value, Load);
304     case Subscript_kind:
305         return validate_expr(exp->v.Subscript.slice, Load) &&
306             validate_expr(exp->v.Subscript.value, Load);
307     case Starred_kind:
308         return validate_expr(exp->v.Starred.value, ctx);
309     case Slice_kind:
310         return (!exp->v.Slice.lower || validate_expr(exp->v.Slice.lower, Load)) &&
311             (!exp->v.Slice.upper || validate_expr(exp->v.Slice.upper, Load)) &&
312             (!exp->v.Slice.step || validate_expr(exp->v.Slice.step, Load));
313     case List_kind:
314         return validate_exprs(exp->v.List.elts, ctx, 0);
315     case Tuple_kind:
316         return validate_exprs(exp->v.Tuple.elts, ctx, 0);
317     case NamedExpr_kind:
318         return validate_expr(exp->v.NamedExpr.value, Load);
319     /* This last case doesn't have any checking. */
320     case Name_kind:
321         return 1;
322     }
323     PyErr_SetString(PyExc_SystemError, "unexpected expression");
324     return 0;
325 }
326 
327 static int
validate_nonempty_seq(asdl_seq * seq,const char * what,const char * owner)328 validate_nonempty_seq(asdl_seq *seq, const char *what, const char *owner)
329 {
330     if (asdl_seq_LEN(seq))
331         return 1;
332     PyErr_Format(PyExc_ValueError, "empty %s on %s", what, owner);
333     return 0;
334 }
335 
336 static int
validate_assignlist(asdl_seq * targets,expr_context_ty ctx)337 validate_assignlist(asdl_seq *targets, expr_context_ty ctx)
338 {
339     return validate_nonempty_seq(targets, "targets", ctx == Del ? "Delete" : "Assign") &&
340         validate_exprs(targets, ctx, 0);
341 }
342 
343 static int
validate_body(asdl_seq * body,const char * owner)344 validate_body(asdl_seq *body, const char *owner)
345 {
346     return validate_nonempty_seq(body, "body", owner) && validate_stmts(body);
347 }
348 
349 static int
validate_stmt(stmt_ty stmt)350 validate_stmt(stmt_ty stmt)
351 {
352     Py_ssize_t i;
353     switch (stmt->kind) {
354     case FunctionDef_kind:
355         return validate_body(stmt->v.FunctionDef.body, "FunctionDef") &&
356             validate_arguments(stmt->v.FunctionDef.args) &&
357             validate_exprs(stmt->v.FunctionDef.decorator_list, Load, 0) &&
358             (!stmt->v.FunctionDef.returns ||
359              validate_expr(stmt->v.FunctionDef.returns, Load));
360     case ClassDef_kind:
361         return validate_body(stmt->v.ClassDef.body, "ClassDef") &&
362             validate_exprs(stmt->v.ClassDef.bases, Load, 0) &&
363             validate_keywords(stmt->v.ClassDef.keywords) &&
364             validate_exprs(stmt->v.ClassDef.decorator_list, Load, 0);
365     case Return_kind:
366         return !stmt->v.Return.value || validate_expr(stmt->v.Return.value, Load);
367     case Delete_kind:
368         return validate_assignlist(stmt->v.Delete.targets, Del);
369     case Assign_kind:
370         return validate_assignlist(stmt->v.Assign.targets, Store) &&
371             validate_expr(stmt->v.Assign.value, Load);
372     case AugAssign_kind:
373         return validate_expr(stmt->v.AugAssign.target, Store) &&
374             validate_expr(stmt->v.AugAssign.value, Load);
375     case AnnAssign_kind:
376         if (stmt->v.AnnAssign.target->kind != Name_kind &&
377             stmt->v.AnnAssign.simple) {
378             PyErr_SetString(PyExc_TypeError,
379                             "AnnAssign with simple non-Name target");
380             return 0;
381         }
382         return validate_expr(stmt->v.AnnAssign.target, Store) &&
383                (!stmt->v.AnnAssign.value ||
384                 validate_expr(stmt->v.AnnAssign.value, Load)) &&
385                validate_expr(stmt->v.AnnAssign.annotation, Load);
386     case For_kind:
387         return validate_expr(stmt->v.For.target, Store) &&
388             validate_expr(stmt->v.For.iter, Load) &&
389             validate_body(stmt->v.For.body, "For") &&
390             validate_stmts(stmt->v.For.orelse);
391     case AsyncFor_kind:
392         return validate_expr(stmt->v.AsyncFor.target, Store) &&
393             validate_expr(stmt->v.AsyncFor.iter, Load) &&
394             validate_body(stmt->v.AsyncFor.body, "AsyncFor") &&
395             validate_stmts(stmt->v.AsyncFor.orelse);
396     case While_kind:
397         return validate_expr(stmt->v.While.test, Load) &&
398             validate_body(stmt->v.While.body, "While") &&
399             validate_stmts(stmt->v.While.orelse);
400     case If_kind:
401         return validate_expr(stmt->v.If.test, Load) &&
402             validate_body(stmt->v.If.body, "If") &&
403             validate_stmts(stmt->v.If.orelse);
404     case With_kind:
405         if (!validate_nonempty_seq(stmt->v.With.items, "items", "With"))
406             return 0;
407         for (i = 0; i < asdl_seq_LEN(stmt->v.With.items); i++) {
408             withitem_ty item = asdl_seq_GET(stmt->v.With.items, i);
409             if (!validate_expr(item->context_expr, Load) ||
410                 (item->optional_vars && !validate_expr(item->optional_vars, Store)))
411                 return 0;
412         }
413         return validate_body(stmt->v.With.body, "With");
414     case AsyncWith_kind:
415         if (!validate_nonempty_seq(stmt->v.AsyncWith.items, "items", "AsyncWith"))
416             return 0;
417         for (i = 0; i < asdl_seq_LEN(stmt->v.AsyncWith.items); i++) {
418             withitem_ty item = asdl_seq_GET(stmt->v.AsyncWith.items, i);
419             if (!validate_expr(item->context_expr, Load) ||
420                 (item->optional_vars && !validate_expr(item->optional_vars, Store)))
421                 return 0;
422         }
423         return validate_body(stmt->v.AsyncWith.body, "AsyncWith");
424     case Raise_kind:
425         if (stmt->v.Raise.exc) {
426             return validate_expr(stmt->v.Raise.exc, Load) &&
427                 (!stmt->v.Raise.cause || validate_expr(stmt->v.Raise.cause, Load));
428         }
429         if (stmt->v.Raise.cause) {
430             PyErr_SetString(PyExc_ValueError, "Raise with cause but no exception");
431             return 0;
432         }
433         return 1;
434     case Try_kind:
435         if (!validate_body(stmt->v.Try.body, "Try"))
436             return 0;
437         if (!asdl_seq_LEN(stmt->v.Try.handlers) &&
438             !asdl_seq_LEN(stmt->v.Try.finalbody)) {
439             PyErr_SetString(PyExc_ValueError, "Try has neither except handlers nor finalbody");
440             return 0;
441         }
442         if (!asdl_seq_LEN(stmt->v.Try.handlers) &&
443             asdl_seq_LEN(stmt->v.Try.orelse)) {
444             PyErr_SetString(PyExc_ValueError, "Try has orelse but no except handlers");
445             return 0;
446         }
447         for (i = 0; i < asdl_seq_LEN(stmt->v.Try.handlers); i++) {
448             excepthandler_ty handler = asdl_seq_GET(stmt->v.Try.handlers, i);
449             if ((handler->v.ExceptHandler.type &&
450                  !validate_expr(handler->v.ExceptHandler.type, Load)) ||
451                 !validate_body(handler->v.ExceptHandler.body, "ExceptHandler"))
452                 return 0;
453         }
454         return (!asdl_seq_LEN(stmt->v.Try.finalbody) ||
455                 validate_stmts(stmt->v.Try.finalbody)) &&
456             (!asdl_seq_LEN(stmt->v.Try.orelse) ||
457              validate_stmts(stmt->v.Try.orelse));
458     case Assert_kind:
459         return validate_expr(stmt->v.Assert.test, Load) &&
460             (!stmt->v.Assert.msg || validate_expr(stmt->v.Assert.msg, Load));
461     case Import_kind:
462         return validate_nonempty_seq(stmt->v.Import.names, "names", "Import");
463     case ImportFrom_kind:
464         if (stmt->v.ImportFrom.level < 0) {
465             PyErr_SetString(PyExc_ValueError, "Negative ImportFrom level");
466             return 0;
467         }
468         return validate_nonempty_seq(stmt->v.ImportFrom.names, "names", "ImportFrom");
469     case Global_kind:
470         return validate_nonempty_seq(stmt->v.Global.names, "names", "Global");
471     case Nonlocal_kind:
472         return validate_nonempty_seq(stmt->v.Nonlocal.names, "names", "Nonlocal");
473     case Expr_kind:
474         return validate_expr(stmt->v.Expr.value, Load);
475     case AsyncFunctionDef_kind:
476         return validate_body(stmt->v.AsyncFunctionDef.body, "AsyncFunctionDef") &&
477             validate_arguments(stmt->v.AsyncFunctionDef.args) &&
478             validate_exprs(stmt->v.AsyncFunctionDef.decorator_list, Load, 0) &&
479             (!stmt->v.AsyncFunctionDef.returns ||
480              validate_expr(stmt->v.AsyncFunctionDef.returns, Load));
481     case Pass_kind:
482     case Break_kind:
483     case Continue_kind:
484         return 1;
485     default:
486         PyErr_SetString(PyExc_SystemError, "unexpected statement");
487         return 0;
488     }
489 }
490 
491 static int
validate_stmts(asdl_seq * seq)492 validate_stmts(asdl_seq *seq)
493 {
494     Py_ssize_t i;
495     for (i = 0; i < asdl_seq_LEN(seq); i++) {
496         stmt_ty stmt = asdl_seq_GET(seq, i);
497         if (stmt) {
498             if (!validate_stmt(stmt))
499                 return 0;
500         }
501         else {
502             PyErr_SetString(PyExc_ValueError,
503                             "None disallowed in statement list");
504             return 0;
505         }
506     }
507     return 1;
508 }
509 
510 static int
validate_exprs(asdl_seq * exprs,expr_context_ty ctx,int null_ok)511 validate_exprs(asdl_seq *exprs, expr_context_ty ctx, int null_ok)
512 {
513     Py_ssize_t i;
514     for (i = 0; i < asdl_seq_LEN(exprs); i++) {
515         expr_ty expr = asdl_seq_GET(exprs, i);
516         if (expr) {
517             if (!validate_expr(expr, ctx))
518                 return 0;
519         }
520         else if (!null_ok) {
521             PyErr_SetString(PyExc_ValueError,
522                             "None disallowed in expression list");
523             return 0;
524         }
525 
526     }
527     return 1;
528 }
529 
530 int
PyAST_Validate(mod_ty mod)531 PyAST_Validate(mod_ty mod)
532 {
533     int res = 0;
534 
535     switch (mod->kind) {
536     case Module_kind:
537         res = validate_stmts(mod->v.Module.body);
538         break;
539     case Interactive_kind:
540         res = validate_stmts(mod->v.Interactive.body);
541         break;
542     case Expression_kind:
543         res = validate_expr(mod->v.Expression.body, Load);
544         break;
545     default:
546         PyErr_SetString(PyExc_SystemError, "impossible module node");
547         res = 0;
548         break;
549     }
550     return res;
551 }
552 
553 /* This is done here, so defines like "test" don't interfere with AST use above. */
554 #include "grammar.h"
555 #include "parsetok.h"
556 #include "graminit.h"
557 
558 /* Data structure used internally */
559 struct compiling {
560     PyArena *c_arena; /* Arena for allocating memory. */
561     PyObject *c_filename; /* filename */
562     PyObject *c_normalize; /* Normalization function from unicodedata. */
563     int c_feature_version; /* Latest minor version of Python for allowed features */
564 };
565 
566 static asdl_seq *seq_for_testlist(struct compiling *, const node *);
567 static expr_ty ast_for_expr(struct compiling *, const node *);
568 static stmt_ty ast_for_stmt(struct compiling *, const node *);
569 static asdl_seq *ast_for_suite(struct compiling *c, const node *n);
570 static asdl_seq *ast_for_exprlist(struct compiling *, const node *,
571                                   expr_context_ty);
572 static expr_ty ast_for_testlist(struct compiling *, const node *);
573 static stmt_ty ast_for_classdef(struct compiling *, const node *, asdl_seq *);
574 
575 static stmt_ty ast_for_with_stmt(struct compiling *, const node *, bool);
576 static stmt_ty ast_for_for_stmt(struct compiling *, const node *, bool);
577 
578 /* Note different signature for ast_for_call */
579 static expr_ty ast_for_call(struct compiling *, const node *, expr_ty,
580                             const node *, const node *, const node *);
581 
582 static PyObject *parsenumber(struct compiling *, const char *);
583 static expr_ty parsestrplus(struct compiling *, const node *n);
584 static void get_last_end_pos(asdl_seq *, int *, int *);
585 
586 #define COMP_GENEXP   0
587 #define COMP_LISTCOMP 1
588 #define COMP_SETCOMP  2
589 
590 static int
init_normalization(struct compiling * c)591 init_normalization(struct compiling *c)
592 {
593     PyObject *m = PyImport_ImportModuleNoBlock("unicodedata");
594     if (!m)
595         return 0;
596     c->c_normalize = PyObject_GetAttrString(m, "normalize");
597     Py_DECREF(m);
598     if (!c->c_normalize)
599         return 0;
600     return 1;
601 }
602 
603 static identifier
new_identifier(const char * n,struct compiling * c)604 new_identifier(const char *n, struct compiling *c)
605 {
606     PyObject *id = PyUnicode_DecodeUTF8(n, strlen(n), NULL);
607     if (!id)
608         return NULL;
609     /* PyUnicode_DecodeUTF8 should always return a ready string. */
610     assert(PyUnicode_IS_READY(id));
611     /* Check whether there are non-ASCII characters in the
612        identifier; if so, normalize to NFKC. */
613     if (!PyUnicode_IS_ASCII(id)) {
614         PyObject *id2;
615         if (!c->c_normalize && !init_normalization(c)) {
616             Py_DECREF(id);
617             return NULL;
618         }
619         PyObject *form = PyUnicode_InternFromString("NFKC");
620         if (form == NULL) {
621             Py_DECREF(id);
622             return NULL;
623         }
624         PyObject *args[2] = {form, id};
625         id2 = _PyObject_FastCall(c->c_normalize, args, 2);
626         Py_DECREF(id);
627         Py_DECREF(form);
628         if (!id2)
629             return NULL;
630         if (!PyUnicode_Check(id2)) {
631             PyErr_Format(PyExc_TypeError,
632                          "unicodedata.normalize() must return a string, not "
633                          "%.200s",
634                          _PyType_Name(Py_TYPE(id2)));
635             Py_DECREF(id2);
636             return NULL;
637         }
638         id = id2;
639     }
640     PyUnicode_InternInPlace(&id);
641     if (PyArena_AddPyObject(c->c_arena, id) < 0) {
642         Py_DECREF(id);
643         return NULL;
644     }
645     return id;
646 }
647 
648 #define NEW_IDENTIFIER(n) new_identifier(STR(n), c)
649 
650 static int
ast_error(struct compiling * c,const node * n,const char * errmsg,...)651 ast_error(struct compiling *c, const node *n, const char *errmsg, ...)
652 {
653     PyObject *value, *errstr, *loc, *tmp;
654     va_list va;
655 
656     va_start(va, errmsg);
657     errstr = PyUnicode_FromFormatV(errmsg, va);
658     va_end(va);
659     if (!errstr) {
660         return 0;
661     }
662     loc = PyErr_ProgramTextObject(c->c_filename, LINENO(n));
663     if (!loc) {
664         Py_INCREF(Py_None);
665         loc = Py_None;
666     }
667     tmp = Py_BuildValue("(OiiN)", c->c_filename, LINENO(n), n->n_col_offset + 1, loc);
668     if (!tmp) {
669         Py_DECREF(errstr);
670         return 0;
671     }
672     value = PyTuple_Pack(2, errstr, tmp);
673     Py_DECREF(errstr);
674     Py_DECREF(tmp);
675     if (value) {
676         PyErr_SetObject(PyExc_SyntaxError, value);
677         Py_DECREF(value);
678     }
679     return 0;
680 }
681 
682 /* num_stmts() returns number of contained statements.
683 
684    Use this routine to determine how big a sequence is needed for
685    the statements in a parse tree.  Its raison d'etre is this bit of
686    grammar:
687 
688    stmt: simple_stmt | compound_stmt
689    simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
690 
691    A simple_stmt can contain multiple small_stmt elements joined
692    by semicolons.  If the arg is a simple_stmt, the number of
693    small_stmt elements is returned.
694 */
695 
696 static string
new_type_comment(const char * s,struct compiling * c)697 new_type_comment(const char *s, struct compiling *c)
698 {
699     PyObject *res = PyUnicode_DecodeUTF8(s, strlen(s), NULL);
700     if (res == NULL)
701         return NULL;
702     if (PyArena_AddPyObject(c->c_arena, res) < 0) {
703         Py_DECREF(res);
704         return NULL;
705     }
706     return res;
707 }
708 #define NEW_TYPE_COMMENT(n) new_type_comment(STR(n), c)
709 
710 static int
num_stmts(const node * n)711 num_stmts(const node *n)
712 {
713     int i, l;
714     node *ch;
715 
716     switch (TYPE(n)) {
717         case single_input:
718             if (TYPE(CHILD(n, 0)) == NEWLINE)
719                 return 0;
720             else
721                 return num_stmts(CHILD(n, 0));
722         case file_input:
723             l = 0;
724             for (i = 0; i < NCH(n); i++) {
725                 ch = CHILD(n, i);
726                 if (TYPE(ch) == stmt)
727                     l += num_stmts(ch);
728             }
729             return l;
730         case stmt:
731             return num_stmts(CHILD(n, 0));
732         case compound_stmt:
733             return 1;
734         case simple_stmt:
735             return NCH(n) / 2; /* Divide by 2 to remove count of semi-colons */
736         case suite:
737         case func_body_suite:
738             /* func_body_suite: simple_stmt | NEWLINE [TYPE_COMMENT NEWLINE] INDENT stmt+ DEDENT */
739             /* suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT */
740             if (NCH(n) == 1)
741                 return num_stmts(CHILD(n, 0));
742             else {
743                 i = 2;
744                 l = 0;
745                 if (TYPE(CHILD(n, 1)) == TYPE_COMMENT)
746                     i += 2;
747                 for (; i < (NCH(n) - 1); i++)
748                     l += num_stmts(CHILD(n, i));
749                 return l;
750             }
751         default: {
752             _Py_FatalErrorFormat(__func__, "Non-statement found: %d %d",
753                                  TYPE(n), NCH(n));
754         }
755     }
756     Py_UNREACHABLE();
757 }
758 
759 /* Transform the CST rooted at node * to the appropriate AST
760 */
761 
762 mod_ty
PyAST_FromNodeObject(const node * n,PyCompilerFlags * flags,PyObject * filename,PyArena * arena)763 PyAST_FromNodeObject(const node *n, PyCompilerFlags *flags,
764                      PyObject *filename, PyArena *arena)
765 {
766     int i, j, k, num;
767     asdl_seq *stmts = NULL;
768     asdl_seq *type_ignores = NULL;
769     stmt_ty s;
770     node *ch;
771     struct compiling c;
772     mod_ty res = NULL;
773     asdl_seq *argtypes = NULL;
774     expr_ty ret, arg;
775 
776     c.c_arena = arena;
777     /* borrowed reference */
778     c.c_filename = filename;
779     c.c_normalize = NULL;
780     c.c_feature_version = flags && (flags->cf_flags & PyCF_ONLY_AST) ?
781         flags->cf_feature_version : PY_MINOR_VERSION;
782 
783     if (TYPE(n) == encoding_decl)
784         n = CHILD(n, 0);
785 
786     k = 0;
787     switch (TYPE(n)) {
788         case file_input:
789             stmts = _Py_asdl_seq_new(num_stmts(n), arena);
790             if (!stmts)
791                 goto out;
792             for (i = 0; i < NCH(n) - 1; i++) {
793                 ch = CHILD(n, i);
794                 if (TYPE(ch) == NEWLINE)
795                     continue;
796                 REQ(ch, stmt);
797                 num = num_stmts(ch);
798                 if (num == 1) {
799                     s = ast_for_stmt(&c, ch);
800                     if (!s)
801                         goto out;
802                     asdl_seq_SET(stmts, k++, s);
803                 }
804                 else {
805                     ch = CHILD(ch, 0);
806                     REQ(ch, simple_stmt);
807                     for (j = 0; j < num; j++) {
808                         s = ast_for_stmt(&c, CHILD(ch, j * 2));
809                         if (!s)
810                             goto out;
811                         asdl_seq_SET(stmts, k++, s);
812                     }
813                 }
814             }
815 
816             /* Type ignores are stored under the ENDMARKER in file_input. */
817             ch = CHILD(n, NCH(n) - 1);
818             REQ(ch, ENDMARKER);
819             num = NCH(ch);
820             type_ignores = _Py_asdl_seq_new(num, arena);
821             if (!type_ignores)
822                 goto out;
823 
824             for (i = 0; i < num; i++) {
825                 string type_comment = new_type_comment(STR(CHILD(ch, i)), &c);
826                 if (!type_comment)
827                     goto out;
828                 type_ignore_ty ti = TypeIgnore(LINENO(CHILD(ch, i)), type_comment, arena);
829                 if (!ti)
830                    goto out;
831                asdl_seq_SET(type_ignores, i, ti);
832             }
833 
834             res = Module(stmts, type_ignores, arena);
835             break;
836         case eval_input: {
837             expr_ty testlist_ast;
838 
839             /* XXX Why not comp_for here? */
840             testlist_ast = ast_for_testlist(&c, CHILD(n, 0));
841             if (!testlist_ast)
842                 goto out;
843             res = Expression(testlist_ast, arena);
844             break;
845         }
846         case single_input:
847             if (TYPE(CHILD(n, 0)) == NEWLINE) {
848                 stmts = _Py_asdl_seq_new(1, arena);
849                 if (!stmts)
850                     goto out;
851                 asdl_seq_SET(stmts, 0, Pass(n->n_lineno, n->n_col_offset,
852                                             n->n_end_lineno, n->n_end_col_offset,
853                                             arena));
854                 if (!asdl_seq_GET(stmts, 0))
855                     goto out;
856                 res = Interactive(stmts, arena);
857             }
858             else {
859                 n = CHILD(n, 0);
860                 num = num_stmts(n);
861                 stmts = _Py_asdl_seq_new(num, arena);
862                 if (!stmts)
863                     goto out;
864                 if (num == 1) {
865                     s = ast_for_stmt(&c, n);
866                     if (!s)
867                         goto out;
868                     asdl_seq_SET(stmts, 0, s);
869                 }
870                 else {
871                     /* Only a simple_stmt can contain multiple statements. */
872                     REQ(n, simple_stmt);
873                     for (i = 0; i < NCH(n); i += 2) {
874                         if (TYPE(CHILD(n, i)) == NEWLINE)
875                             break;
876                         s = ast_for_stmt(&c, CHILD(n, i));
877                         if (!s)
878                             goto out;
879                         asdl_seq_SET(stmts, i / 2, s);
880                     }
881                 }
882 
883                 res = Interactive(stmts, arena);
884             }
885             break;
886         case func_type_input:
887             n = CHILD(n, 0);
888             REQ(n, func_type);
889 
890             if (TYPE(CHILD(n, 1)) == typelist) {
891                 ch = CHILD(n, 1);
892                 /* this is overly permissive -- we don't pay any attention to
893                  * stars on the args -- just parse them into an ordered list */
894                 num = 0;
895                 for (i = 0; i < NCH(ch); i++) {
896                     if (TYPE(CHILD(ch, i)) == test) {
897                         num++;
898                     }
899                 }
900 
901                 argtypes = _Py_asdl_seq_new(num, arena);
902                 if (!argtypes)
903                     goto out;
904 
905                 j = 0;
906                 for (i = 0; i < NCH(ch); i++) {
907                     if (TYPE(CHILD(ch, i)) == test) {
908                         arg = ast_for_expr(&c, CHILD(ch, i));
909                         if (!arg)
910                             goto out;
911                         asdl_seq_SET(argtypes, j++, arg);
912                     }
913                 }
914             }
915             else {
916                 argtypes = _Py_asdl_seq_new(0, arena);
917                 if (!argtypes)
918                     goto out;
919             }
920 
921             ret = ast_for_expr(&c, CHILD(n, NCH(n) - 1));
922             if (!ret)
923                 goto out;
924             res = FunctionType(argtypes, ret, arena);
925             break;
926         default:
927             PyErr_Format(PyExc_SystemError,
928                          "invalid node %d for PyAST_FromNode", TYPE(n));
929             goto out;
930     }
931  out:
932     if (c.c_normalize) {
933         Py_DECREF(c.c_normalize);
934     }
935     return res;
936 }
937 
938 mod_ty
PyAST_FromNode(const node * n,PyCompilerFlags * flags,const char * filename_str,PyArena * arena)939 PyAST_FromNode(const node *n, PyCompilerFlags *flags, const char *filename_str,
940                PyArena *arena)
941 {
942     mod_ty mod;
943     PyObject *filename;
944     filename = PyUnicode_DecodeFSDefault(filename_str);
945     if (filename == NULL)
946         return NULL;
947     mod = PyAST_FromNodeObject(n, flags, filename, arena);
948     Py_DECREF(filename);
949     return mod;
950 
951 }
952 
953 /* Return the AST repr. of the operator represented as syntax (|, ^, etc.)
954 */
955 
956 static operator_ty
get_operator(struct compiling * c,const node * n)957 get_operator(struct compiling *c, const node *n)
958 {
959     switch (TYPE(n)) {
960         case VBAR:
961             return BitOr;
962         case CIRCUMFLEX:
963             return BitXor;
964         case AMPER:
965             return BitAnd;
966         case LEFTSHIFT:
967             return LShift;
968         case RIGHTSHIFT:
969             return RShift;
970         case PLUS:
971             return Add;
972         case MINUS:
973             return Sub;
974         case STAR:
975             return Mult;
976         case AT:
977             if (c->c_feature_version < 5) {
978                 ast_error(c, n,
979                           "The '@' operator is only supported in Python 3.5 and greater");
980                 return (operator_ty)0;
981             }
982             return MatMult;
983         case SLASH:
984             return Div;
985         case DOUBLESLASH:
986             return FloorDiv;
987         case PERCENT:
988             return Mod;
989         default:
990             return (operator_ty)0;
991     }
992 }
993 
994 static const char * const FORBIDDEN[] = {
995     "None",
996     "True",
997     "False",
998     "__debug__",
999     NULL,
1000 };
1001 
1002 static int
forbidden_name(struct compiling * c,identifier name,const node * n,int full_checks)1003 forbidden_name(struct compiling *c, identifier name, const node *n,
1004                int full_checks)
1005 {
1006     assert(PyUnicode_Check(name));
1007     const char * const *p = FORBIDDEN;
1008     if (!full_checks) {
1009         /* In most cases, the parser will protect True, False, and None
1010            from being assign to. */
1011         p += 3;
1012     }
1013     for (; *p; p++) {
1014         if (_PyUnicode_EqualToASCIIString(name, *p)) {
1015             ast_error(c, n, "cannot assign to %U", name);
1016             return 1;
1017         }
1018     }
1019     return 0;
1020 }
1021 
1022 static expr_ty
copy_location(expr_ty e,const node * n,const node * end)1023 copy_location(expr_ty e, const node *n, const node *end)
1024 {
1025     if (e) {
1026         e->lineno = LINENO(n);
1027         e->col_offset = n->n_col_offset;
1028         e->end_lineno = end->n_end_lineno;
1029         e->end_col_offset = end->n_end_col_offset;
1030     }
1031     return e;
1032 }
1033 
1034 static const char *
get_expr_name(expr_ty e)1035 get_expr_name(expr_ty e)
1036 {
1037     switch (e->kind) {
1038         case Attribute_kind:
1039             return "attribute";
1040         case Subscript_kind:
1041             return "subscript";
1042         case Starred_kind:
1043             return "starred";
1044         case Name_kind:
1045             return "name";
1046         case List_kind:
1047             return "list";
1048         case Tuple_kind:
1049             return "tuple";
1050         case Lambda_kind:
1051             return "lambda";
1052         case Call_kind:
1053             return "function call";
1054         case BoolOp_kind:
1055         case BinOp_kind:
1056         case UnaryOp_kind:
1057             return "operator";
1058         case GeneratorExp_kind:
1059             return "generator expression";
1060         case Yield_kind:
1061         case YieldFrom_kind:
1062             return "yield expression";
1063         case Await_kind:
1064             return "await expression";
1065         case ListComp_kind:
1066             return "list comprehension";
1067         case SetComp_kind:
1068             return "set comprehension";
1069         case DictComp_kind:
1070             return "dict comprehension";
1071         case Dict_kind:
1072             return "dict display";
1073         case Set_kind:
1074             return "set display";
1075         case JoinedStr_kind:
1076         case FormattedValue_kind:
1077             return "f-string expression";
1078         case Constant_kind: {
1079             PyObject *value = e->v.Constant.value;
1080             if (value == Py_None) {
1081                 return "None";
1082             }
1083             if (value == Py_False) {
1084                 return "False";
1085             }
1086             if (value == Py_True) {
1087                 return "True";
1088             }
1089             if (value == Py_Ellipsis) {
1090                 return "Ellipsis";
1091             }
1092             return "literal";
1093         }
1094         case Compare_kind:
1095             return "comparison";
1096         case IfExp_kind:
1097             return "conditional expression";
1098         case NamedExpr_kind:
1099             return "named expression";
1100         default:
1101             PyErr_Format(PyExc_SystemError,
1102                          "unexpected expression in assignment %d (line %d)",
1103                          e->kind, e->lineno);
1104             return NULL;
1105     }
1106 }
1107 
1108 /* Set the context ctx for expr_ty e, recursively traversing e.
1109 
1110    Only sets context for expr kinds that "can appear in assignment context"
1111    (according to ../Parser/Python.asdl).  For other expr kinds, it sets
1112    an appropriate syntax error and returns false.
1113 */
1114 
1115 static int
set_context(struct compiling * c,expr_ty e,expr_context_ty ctx,const node * n)1116 set_context(struct compiling *c, expr_ty e, expr_context_ty ctx, const node *n)
1117 {
1118     asdl_seq *s = NULL;
1119 
1120     /* Expressions in an augmented assignment have a Store context. */
1121 
1122     switch (e->kind) {
1123         case Attribute_kind:
1124             e->v.Attribute.ctx = ctx;
1125             if (ctx == Store && forbidden_name(c, e->v.Attribute.attr, n, 1))
1126                 return 0;
1127             break;
1128         case Subscript_kind:
1129             e->v.Subscript.ctx = ctx;
1130             break;
1131         case Starred_kind:
1132             e->v.Starred.ctx = ctx;
1133             if (!set_context(c, e->v.Starred.value, ctx, n))
1134                 return 0;
1135             break;
1136         case Name_kind:
1137             if (ctx == Store) {
1138                 if (forbidden_name(c, e->v.Name.id, n, 0))
1139                     return 0; /* forbidden_name() calls ast_error() */
1140             }
1141             e->v.Name.ctx = ctx;
1142             break;
1143         case List_kind:
1144             e->v.List.ctx = ctx;
1145             s = e->v.List.elts;
1146             break;
1147         case Tuple_kind:
1148             e->v.Tuple.ctx = ctx;
1149             s = e->v.Tuple.elts;
1150             break;
1151         default: {
1152             const char *expr_name = get_expr_name(e);
1153             if (expr_name != NULL) {
1154                 ast_error(c, n, "cannot %s %s",
1155                           ctx == Store ? "assign to" : "delete",
1156                           expr_name);
1157             }
1158             return 0;
1159         }
1160     }
1161 
1162     /* If the LHS is a list or tuple, we need to set the assignment
1163        context for all the contained elements.
1164     */
1165     if (s) {
1166         Py_ssize_t i;
1167 
1168         for (i = 0; i < asdl_seq_LEN(s); i++) {
1169             if (!set_context(c, (expr_ty)asdl_seq_GET(s, i), ctx, n))
1170                 return 0;
1171         }
1172     }
1173     return 1;
1174 }
1175 
1176 static operator_ty
ast_for_augassign(struct compiling * c,const node * n)1177 ast_for_augassign(struct compiling *c, const node *n)
1178 {
1179     REQ(n, augassign);
1180     n = CHILD(n, 0);
1181     switch (STR(n)[0]) {
1182         case '+':
1183             return Add;
1184         case '-':
1185             return Sub;
1186         case '/':
1187             if (STR(n)[1] == '/')
1188                 return FloorDiv;
1189             else
1190                 return Div;
1191         case '%':
1192             return Mod;
1193         case '<':
1194             return LShift;
1195         case '>':
1196             return RShift;
1197         case '&':
1198             return BitAnd;
1199         case '^':
1200             return BitXor;
1201         case '|':
1202             return BitOr;
1203         case '*':
1204             if (STR(n)[1] == '*')
1205                 return Pow;
1206             else
1207                 return Mult;
1208         case '@':
1209             if (c->c_feature_version < 5) {
1210                 ast_error(c, n,
1211                           "The '@' operator is only supported in Python 3.5 and greater");
1212                 return (operator_ty)0;
1213             }
1214             return MatMult;
1215         default:
1216             PyErr_Format(PyExc_SystemError, "invalid augassign: %s", STR(n));
1217             return (operator_ty)0;
1218     }
1219 }
1220 
1221 static cmpop_ty
ast_for_comp_op(struct compiling * c,const node * n)1222 ast_for_comp_op(struct compiling *c, const node *n)
1223 {
1224     /* comp_op: '<'|'>'|'=='|'>='|'<='|'!='|'in'|'not' 'in'|'is'
1225                |'is' 'not'
1226     */
1227     REQ(n, comp_op);
1228     if (NCH(n) == 1) {
1229         n = CHILD(n, 0);
1230         switch (TYPE(n)) {
1231             case LESS:
1232                 return Lt;
1233             case GREATER:
1234                 return Gt;
1235             case EQEQUAL:                       /* == */
1236                 return Eq;
1237             case LESSEQUAL:
1238                 return LtE;
1239             case GREATEREQUAL:
1240                 return GtE;
1241             case NOTEQUAL:
1242                 return NotEq;
1243             case NAME:
1244                 if (strcmp(STR(n), "in") == 0)
1245                     return In;
1246                 if (strcmp(STR(n), "is") == 0)
1247                     return Is;
1248                 /* fall through */
1249             default:
1250                 PyErr_Format(PyExc_SystemError, "invalid comp_op: %s",
1251                              STR(n));
1252                 return (cmpop_ty)0;
1253         }
1254     }
1255     else if (NCH(n) == 2) {
1256         /* handle "not in" and "is not" */
1257         switch (TYPE(CHILD(n, 0))) {
1258             case NAME:
1259                 if (strcmp(STR(CHILD(n, 1)), "in") == 0)
1260                     return NotIn;
1261                 if (strcmp(STR(CHILD(n, 0)), "is") == 0)
1262                     return IsNot;
1263                 /* fall through */
1264             default:
1265                 PyErr_Format(PyExc_SystemError, "invalid comp_op: %s %s",
1266                              STR(CHILD(n, 0)), STR(CHILD(n, 1)));
1267                 return (cmpop_ty)0;
1268         }
1269     }
1270     PyErr_Format(PyExc_SystemError, "invalid comp_op: has %d children",
1271                  NCH(n));
1272     return (cmpop_ty)0;
1273 }
1274 
1275 static asdl_seq *
seq_for_testlist(struct compiling * c,const node * n)1276 seq_for_testlist(struct compiling *c, const node *n)
1277 {
1278     /* testlist: test (',' test)* [',']
1279        testlist_star_expr: test|star_expr (',' test|star_expr)* [',']
1280     */
1281     asdl_seq *seq;
1282     expr_ty expression;
1283     int i;
1284     assert(TYPE(n) == testlist || TYPE(n) == testlist_star_expr || TYPE(n) == testlist_comp);
1285 
1286     seq = _Py_asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
1287     if (!seq)
1288         return NULL;
1289 
1290     for (i = 0; i < NCH(n); i += 2) {
1291         const node *ch = CHILD(n, i);
1292         assert(TYPE(ch) == test || TYPE(ch) == test_nocond || TYPE(ch) == star_expr || TYPE(ch) == namedexpr_test);
1293 
1294         expression = ast_for_expr(c, ch);
1295         if (!expression)
1296             return NULL;
1297 
1298         assert(i / 2 < seq->size);
1299         asdl_seq_SET(seq, i / 2, expression);
1300     }
1301     return seq;
1302 }
1303 
1304 static arg_ty
ast_for_arg(struct compiling * c,const node * n)1305 ast_for_arg(struct compiling *c, const node *n)
1306 {
1307     identifier name;
1308     expr_ty annotation = NULL;
1309     node *ch;
1310     arg_ty ret;
1311 
1312     assert(TYPE(n) == tfpdef || TYPE(n) == vfpdef);
1313     ch = CHILD(n, 0);
1314     name = NEW_IDENTIFIER(ch);
1315     if (!name)
1316         return NULL;
1317     if (forbidden_name(c, name, ch, 0))
1318         return NULL;
1319 
1320     if (NCH(n) == 3 && TYPE(CHILD(n, 1)) == COLON) {
1321         annotation = ast_for_expr(c, CHILD(n, 2));
1322         if (!annotation)
1323             return NULL;
1324     }
1325 
1326     ret = arg(name, annotation, NULL, LINENO(n), n->n_col_offset,
1327               n->n_end_lineno, n->n_end_col_offset, c->c_arena);
1328     if (!ret)
1329         return NULL;
1330     return ret;
1331 }
1332 
1333 /* returns -1 if failed to handle keyword only arguments
1334    returns new position to keep processing if successful
1335                (',' tfpdef ['=' test])*
1336                      ^^^
1337    start pointing here
1338  */
1339 static int
handle_keywordonly_args(struct compiling * c,const node * n,int start,asdl_seq * kwonlyargs,asdl_seq * kwdefaults)1340 handle_keywordonly_args(struct compiling *c, const node *n, int start,
1341                         asdl_seq *kwonlyargs, asdl_seq *kwdefaults)
1342 {
1343     PyObject *argname;
1344     node *ch;
1345     expr_ty expression, annotation;
1346     arg_ty arg = NULL;
1347     int i = start;
1348     int j = 0; /* index for kwdefaults and kwonlyargs */
1349 
1350     if (kwonlyargs == NULL) {
1351         ast_error(c, CHILD(n, start), "named arguments must follow bare *");
1352         return -1;
1353     }
1354     assert(kwdefaults != NULL);
1355     while (i < NCH(n)) {
1356         ch = CHILD(n, i);
1357         switch (TYPE(ch)) {
1358             case vfpdef:
1359             case tfpdef:
1360                 if (i + 1 < NCH(n) && TYPE(CHILD(n, i + 1)) == EQUAL) {
1361                     expression = ast_for_expr(c, CHILD(n, i + 2));
1362                     if (!expression)
1363                         goto error;
1364                     asdl_seq_SET(kwdefaults, j, expression);
1365                     i += 2; /* '=' and test */
1366                 }
1367                 else { /* setting NULL if no default value exists */
1368                     asdl_seq_SET(kwdefaults, j, NULL);
1369                 }
1370                 if (NCH(ch) == 3) {
1371                     /* ch is NAME ':' test */
1372                     annotation = ast_for_expr(c, CHILD(ch, 2));
1373                     if (!annotation)
1374                         goto error;
1375                 }
1376                 else {
1377                     annotation = NULL;
1378                 }
1379                 ch = CHILD(ch, 0);
1380                 argname = NEW_IDENTIFIER(ch);
1381                 if (!argname)
1382                     goto error;
1383                 if (forbidden_name(c, argname, ch, 0))
1384                     goto error;
1385                 arg = arg(argname, annotation, NULL, LINENO(ch), ch->n_col_offset,
1386                           ch->n_end_lineno, ch->n_end_col_offset,
1387                           c->c_arena);
1388                 if (!arg)
1389                     goto error;
1390                 asdl_seq_SET(kwonlyargs, j++, arg);
1391                 i += 1; /* the name */
1392                 if (i < NCH(n) && TYPE(CHILD(n, i)) == COMMA)
1393                     i += 1; /* the comma, if present */
1394                 break;
1395             case TYPE_COMMENT:
1396                 /* arg will be equal to the last argument processed */
1397                 arg->type_comment = NEW_TYPE_COMMENT(ch);
1398                 if (!arg->type_comment)
1399                     goto error;
1400                 i += 1;
1401                 break;
1402             case DOUBLESTAR:
1403                 return i;
1404             default:
1405                 ast_error(c, ch, "unexpected node");
1406                 goto error;
1407         }
1408     }
1409     return i;
1410  error:
1411     return -1;
1412 }
1413 
1414 /* Create AST for argument list. */
1415 
1416 static arguments_ty
ast_for_arguments(struct compiling * c,const node * n)1417 ast_for_arguments(struct compiling *c, const node *n)
1418 {
1419     /* This function handles both typedargslist (function definition)
1420        and varargslist (lambda definition).
1421 
1422        parameters: '(' [typedargslist] ')'
1423 
1424        The following definition for typedarglist is equivalent to this set of rules:
1425 
1426          arguments = argument (',' [TYPE_COMMENT] argument)*
1427          argument = tfpdef ['=' test]
1428          kwargs = '**' tfpdef [','] [TYPE_COMMENT]
1429          args = '*' [tfpdef]
1430          kwonly_kwargs = (',' [TYPE_COMMENT] argument)* (TYPE_COMMENT | [','
1431                          [TYPE_COMMENT] [kwargs]])
1432          args_kwonly_kwargs = args kwonly_kwargs | kwargs
1433          poskeyword_args_kwonly_kwargs = arguments ( TYPE_COMMENT | [','
1434                                          [TYPE_COMMENT] [args_kwonly_kwargs]])
1435          typedargslist_no_posonly  = poskeyword_args_kwonly_kwargs | args_kwonly_kwargs
1436          typedarglist = (arguments ',' [TYPE_COMMENT] '/' [',' [[TYPE_COMMENT]
1437                         typedargslist_no_posonly]])|(typedargslist_no_posonly)"
1438 
1439        typedargslist: ( (tfpdef ['=' test] (',' [TYPE_COMMENT] tfpdef ['=' test])*
1440            ',' [TYPE_COMMENT] '/' [',' [ [TYPE_COMMENT] tfpdef ['=' test] ( ','
1441            [TYPE_COMMENT] tfpdef ['=' test])* (TYPE_COMMENT | [',' [TYPE_COMMENT] [ '*'
1442            [tfpdef] (',' [TYPE_COMMENT] tfpdef ['=' test])* (TYPE_COMMENT | [','
1443            [TYPE_COMMENT] ['**' tfpdef [','] [TYPE_COMMENT]]]) | '**' tfpdef [',']
1444            [TYPE_COMMENT]]]) | '*' [tfpdef] (',' [TYPE_COMMENT] tfpdef ['=' test])*
1445            (TYPE_COMMENT | [',' [TYPE_COMMENT] ['**' tfpdef [','] [TYPE_COMMENT]]]) |
1446            '**' tfpdef [','] [TYPE_COMMENT]]] ) |  (tfpdef ['=' test] (','
1447            [TYPE_COMMENT] tfpdef ['=' test])* (TYPE_COMMENT | [',' [TYPE_COMMENT] [ '*'
1448            [tfpdef] (',' [TYPE_COMMENT] tfpdef ['=' test])* (TYPE_COMMENT | [','
1449            [TYPE_COMMENT] ['**' tfpdef [','] [TYPE_COMMENT]]]) | '**' tfpdef [',']
1450            [TYPE_COMMENT]]]) | '*' [tfpdef] (',' [TYPE_COMMENT] tfpdef ['=' test])*
1451            (TYPE_COMMENT | [',' [TYPE_COMMENT] ['**' tfpdef [','] [TYPE_COMMENT]]]) |
1452            '**' tfpdef [','] [TYPE_COMMENT]))
1453 
1454        tfpdef: NAME [':' test]
1455 
1456        The following definition for varargslist is equivalent to this set of rules:
1457 
1458          arguments = argument (',' argument )*
1459          argument = vfpdef ['=' test]
1460          kwargs = '**' vfpdef [',']
1461          args = '*' [vfpdef]
1462          kwonly_kwargs = (',' argument )* [',' [kwargs]]
1463          args_kwonly_kwargs = args kwonly_kwargs | kwargs
1464          poskeyword_args_kwonly_kwargs = arguments [',' [args_kwonly_kwargs]]
1465          vararglist_no_posonly = poskeyword_args_kwonly_kwargs | args_kwonly_kwargs
1466          varargslist = arguments ',' '/' [','[(vararglist_no_posonly)]] |
1467                        (vararglist_no_posonly)
1468 
1469        varargslist: vfpdef ['=' test ](',' vfpdef ['=' test])* ',' '/' [',' [ (vfpdef ['='
1470            test] (',' vfpdef ['=' test])* [',' [ '*' [vfpdef] (',' vfpdef ['=' test])* [','
1471            ['**' vfpdef [',']]] | '**' vfpdef [',']]] | '*' [vfpdef] (',' vfpdef ['=' test])*
1472            [',' ['**' vfpdef [',']]] | '**' vfpdef [',']) ]] | (vfpdef ['=' test] (',' vfpdef
1473            ['=' test])* [',' [ '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
1474            | '**' vfpdef [',']]] | '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef
1475            [',']]] | '**' vfpdef [','])
1476 
1477        vfpdef: NAME
1478 
1479     */
1480     int i, j, k, l, nposonlyargs=0, nposargs = 0, nkwonlyargs = 0;
1481     int nposdefaults = 0, found_default = 0;
1482     asdl_seq *posonlyargs, *posargs, *posdefaults, *kwonlyargs, *kwdefaults;
1483     arg_ty vararg = NULL, kwarg = NULL;
1484     arg_ty arg = NULL;
1485     node *ch;
1486 
1487     if (TYPE(n) == parameters) {
1488         if (NCH(n) == 2) /* () as argument list */
1489             return arguments(NULL, NULL, NULL, NULL, NULL, NULL, NULL, c->c_arena);
1490         n = CHILD(n, 1);
1491     }
1492     assert(TYPE(n) == typedargslist || TYPE(n) == varargslist);
1493 
1494     /* First count the number of positional args & defaults.  The
1495        variable i is the loop index for this for loop and the next.
1496        The next loop picks up where the first leaves off.
1497     */
1498     for (i = 0; i < NCH(n); i++) {
1499         ch = CHILD(n, i);
1500         if (TYPE(ch) == STAR) {
1501             /* skip star */
1502             i++;
1503             if (i < NCH(n) && /* skip argument following star */
1504                 (TYPE(CHILD(n, i)) == tfpdef ||
1505                  TYPE(CHILD(n, i)) == vfpdef)) {
1506                 i++;
1507             }
1508             break;
1509         }
1510         if (TYPE(ch) == DOUBLESTAR) break;
1511         if (TYPE(ch) == vfpdef || TYPE(ch) == tfpdef) nposargs++;
1512         if (TYPE(ch) == EQUAL) nposdefaults++;
1513         if (TYPE(ch) == SLASH ) {
1514             nposonlyargs = nposargs;
1515             nposargs = 0;
1516         }
1517     }
1518     /* count the number of keyword only args &
1519        defaults for keyword only args */
1520     for ( ; i < NCH(n); ++i) {
1521         ch = CHILD(n, i);
1522         if (TYPE(ch) == DOUBLESTAR) break;
1523         if (TYPE(ch) == tfpdef || TYPE(ch) == vfpdef) nkwonlyargs++;
1524     }
1525     posonlyargs = (nposonlyargs ? _Py_asdl_seq_new(nposonlyargs, c->c_arena) : NULL);
1526     if (!posonlyargs && nposonlyargs) {
1527         return NULL;
1528     }
1529     posargs = (nposargs ? _Py_asdl_seq_new(nposargs, c->c_arena) : NULL);
1530     if (!posargs && nposargs)
1531         return NULL;
1532     kwonlyargs = (nkwonlyargs ?
1533                    _Py_asdl_seq_new(nkwonlyargs, c->c_arena) : NULL);
1534     if (!kwonlyargs && nkwonlyargs)
1535         return NULL;
1536     posdefaults = (nposdefaults ?
1537                     _Py_asdl_seq_new(nposdefaults, c->c_arena) : NULL);
1538     if (!posdefaults && nposdefaults)
1539         return NULL;
1540     /* The length of kwonlyargs and kwdefaults are same
1541        since we set NULL as default for keyword only argument w/o default
1542        - we have sequence data structure, but no dictionary */
1543     kwdefaults = (nkwonlyargs ?
1544                    _Py_asdl_seq_new(nkwonlyargs, c->c_arena) : NULL);
1545     if (!kwdefaults && nkwonlyargs)
1546         return NULL;
1547 
1548     /* tfpdef: NAME [':' test]
1549        vfpdef: NAME
1550     */
1551     i = 0;
1552     j = 0;  /* index for defaults */
1553     k = 0;  /* index for args */
1554     l = 0;  /* index for posonlyargs */
1555     while (i < NCH(n)) {
1556         ch = CHILD(n, i);
1557         switch (TYPE(ch)) {
1558             case tfpdef:
1559             case vfpdef:
1560                 /* XXX Need to worry about checking if TYPE(CHILD(n, i+1)) is
1561                    anything other than EQUAL or a comma? */
1562                 /* XXX Should NCH(n) check be made a separate check? */
1563                 if (i + 1 < NCH(n) && TYPE(CHILD(n, i + 1)) == EQUAL) {
1564                     expr_ty expression = ast_for_expr(c, CHILD(n, i + 2));
1565                     if (!expression)
1566                         return NULL;
1567                     assert(posdefaults != NULL);
1568                     asdl_seq_SET(posdefaults, j++, expression);
1569                     i += 2;
1570                     found_default = 1;
1571                 }
1572                 else if (found_default) {
1573                     ast_error(c, n,
1574                               "non-default argument follows default argument");
1575                     return NULL;
1576                 }
1577                 arg = ast_for_arg(c, ch);
1578                 if (!arg)
1579                     return NULL;
1580                 if (l < nposonlyargs) {
1581                     asdl_seq_SET(posonlyargs, l++, arg);
1582                 } else {
1583                     asdl_seq_SET(posargs, k++, arg);
1584                 }
1585                 i += 1; /* the name */
1586                 if (i < NCH(n) && TYPE(CHILD(n, i)) == COMMA)
1587                     i += 1; /* the comma, if present */
1588                 break;
1589              case SLASH:
1590                 /* Advance the slash and the comma. If there are more names
1591                  * after the slash there will be a comma so we are advancing
1592                  * the correct number of nodes. If the slash is the last item,
1593                  * we will be advancing an extra token but then * i > NCH(n)
1594                  * and the enclosing while will finish correctly. */
1595                 i += 2;
1596                 break;
1597             case STAR:
1598                 if (i+1 >= NCH(n) ||
1599                     (i+2 == NCH(n) && (TYPE(CHILD(n, i+1)) == COMMA
1600                                        || TYPE(CHILD(n, i+1)) == TYPE_COMMENT))) {
1601                     ast_error(c, CHILD(n, i),
1602                               "named arguments must follow bare *");
1603                     return NULL;
1604                 }
1605                 ch = CHILD(n, i+1);  /* tfpdef or COMMA */
1606                 if (TYPE(ch) == COMMA) {
1607                     int res = 0;
1608                     i += 2; /* now follows keyword only arguments */
1609 
1610                     if (i < NCH(n) && TYPE(CHILD(n, i)) == TYPE_COMMENT) {
1611                         ast_error(c, CHILD(n, i),
1612                                   "bare * has associated type comment");
1613                         return NULL;
1614                     }
1615 
1616                     res = handle_keywordonly_args(c, n, i,
1617                                                   kwonlyargs, kwdefaults);
1618                     if (res == -1) return NULL;
1619                     i = res; /* res has new position to process */
1620                 }
1621                 else {
1622                     vararg = ast_for_arg(c, ch);
1623                     if (!vararg)
1624                         return NULL;
1625 
1626                 i += 2; /* the star and the name */
1627                 if (i < NCH(n) && TYPE(CHILD(n, i)) == COMMA)
1628                     i += 1; /* the comma, if present */
1629 
1630                 if (i < NCH(n) && TYPE(CHILD(n, i)) == TYPE_COMMENT) {
1631                         vararg->type_comment = NEW_TYPE_COMMENT(CHILD(n, i));
1632                         if (!vararg->type_comment)
1633                             return NULL;
1634                         i += 1;
1635                     }
1636 
1637                     if (i < NCH(n) && (TYPE(CHILD(n, i)) == tfpdef
1638                                     || TYPE(CHILD(n, i)) == vfpdef)) {
1639                         int res = 0;
1640                         res = handle_keywordonly_args(c, n, i,
1641                                                       kwonlyargs, kwdefaults);
1642                         if (res == -1) return NULL;
1643                         i = res; /* res has new position to process */
1644                     }
1645                 }
1646                 break;
1647             case DOUBLESTAR:
1648                 ch = CHILD(n, i+1);  /* tfpdef */
1649                 assert(TYPE(ch) == tfpdef || TYPE(ch) == vfpdef);
1650                 kwarg = ast_for_arg(c, ch);
1651                 if (!kwarg)
1652                     return NULL;
1653                 i += 2; /* the double star and the name */
1654                 if (i < NCH(n) && TYPE(CHILD(n, i)) == COMMA)
1655                     i += 1; /* the comma, if present */
1656                 break;
1657             case TYPE_COMMENT:
1658                 assert(i);
1659 
1660                 if (kwarg)
1661                     arg = kwarg;
1662 
1663                 /* arg will be equal to the last argument processed */
1664                 arg->type_comment = NEW_TYPE_COMMENT(ch);
1665                 if (!arg->type_comment)
1666                     return NULL;
1667                 i += 1;
1668                 break;
1669             default:
1670                 PyErr_Format(PyExc_SystemError,
1671                              "unexpected node in varargslist: %d @ %d",
1672                              TYPE(ch), i);
1673                 return NULL;
1674         }
1675     }
1676     return arguments(posonlyargs, posargs, vararg, kwonlyargs, kwdefaults, kwarg, posdefaults, c->c_arena);
1677 }
1678 
1679 static expr_ty
ast_for_decorator(struct compiling * c,const node * n)1680 ast_for_decorator(struct compiling *c, const node *n)
1681 {
1682     /* decorator: '@' namedexpr_test NEWLINE */
1683 
1684     REQ(n, decorator);
1685     REQ(CHILD(n, 0), AT);
1686     REQ(CHILD(n, 2), NEWLINE);
1687 
1688     return ast_for_expr(c, CHILD(n, 1));
1689 }
1690 
1691 static asdl_seq*
ast_for_decorators(struct compiling * c,const node * n)1692 ast_for_decorators(struct compiling *c, const node *n)
1693 {
1694     asdl_seq* decorator_seq;
1695     expr_ty d;
1696     int i;
1697 
1698     REQ(n, decorators);
1699     decorator_seq = _Py_asdl_seq_new(NCH(n), c->c_arena);
1700     if (!decorator_seq)
1701         return NULL;
1702 
1703     for (i = 0; i < NCH(n); i++) {
1704         d = ast_for_decorator(c, CHILD(n, i));
1705         if (!d)
1706             return NULL;
1707         asdl_seq_SET(decorator_seq, i, d);
1708     }
1709     return decorator_seq;
1710 }
1711 
1712 static stmt_ty
ast_for_funcdef_impl(struct compiling * c,const node * n0,asdl_seq * decorator_seq,bool is_async)1713 ast_for_funcdef_impl(struct compiling *c, const node *n0,
1714                      asdl_seq *decorator_seq, bool is_async)
1715 {
1716     /* funcdef: 'def' NAME parameters ['->' test] ':' [TYPE_COMMENT] suite */
1717     const node * const n = is_async ? CHILD(n0, 1) : n0;
1718     identifier name;
1719     arguments_ty args;
1720     asdl_seq *body;
1721     expr_ty returns = NULL;
1722     int name_i = 1;
1723     int end_lineno, end_col_offset;
1724     node *tc;
1725     string type_comment = NULL;
1726 
1727     if (is_async && c->c_feature_version < 5) {
1728         ast_error(c, n,
1729                   "Async functions are only supported in Python 3.5 and greater");
1730         return NULL;
1731     }
1732 
1733     REQ(n, funcdef);
1734 
1735     name = NEW_IDENTIFIER(CHILD(n, name_i));
1736     if (!name)
1737         return NULL;
1738     if (forbidden_name(c, name, CHILD(n, name_i), 0))
1739         return NULL;
1740     args = ast_for_arguments(c, CHILD(n, name_i + 1));
1741     if (!args)
1742         return NULL;
1743     if (TYPE(CHILD(n, name_i+2)) == RARROW) {
1744         returns = ast_for_expr(c, CHILD(n, name_i + 3));
1745         if (!returns)
1746             return NULL;
1747         name_i += 2;
1748     }
1749     if (TYPE(CHILD(n, name_i + 3)) == TYPE_COMMENT) {
1750         type_comment = NEW_TYPE_COMMENT(CHILD(n, name_i + 3));
1751         if (!type_comment)
1752             return NULL;
1753         name_i += 1;
1754     }
1755     body = ast_for_suite(c, CHILD(n, name_i + 3));
1756     if (!body)
1757         return NULL;
1758     get_last_end_pos(body, &end_lineno, &end_col_offset);
1759 
1760     if (NCH(CHILD(n, name_i + 3)) > 1) {
1761         /* Check if the suite has a type comment in it. */
1762         tc = CHILD(CHILD(n, name_i + 3), 1);
1763 
1764         if (TYPE(tc) == TYPE_COMMENT) {
1765             if (type_comment != NULL) {
1766                 ast_error(c, n, "Cannot have two type comments on def");
1767                 return NULL;
1768             }
1769             type_comment = NEW_TYPE_COMMENT(tc);
1770             if (!type_comment)
1771                 return NULL;
1772         }
1773     }
1774 
1775     if (is_async)
1776         return AsyncFunctionDef(name, args, body, decorator_seq, returns, type_comment,
1777                                 LINENO(n0), n0->n_col_offset, end_lineno, end_col_offset, c->c_arena);
1778     else
1779         return FunctionDef(name, args, body, decorator_seq, returns, type_comment,
1780                            LINENO(n), n->n_col_offset, end_lineno, end_col_offset, c->c_arena);
1781 }
1782 
1783 static stmt_ty
ast_for_async_funcdef(struct compiling * c,const node * n,asdl_seq * decorator_seq)1784 ast_for_async_funcdef(struct compiling *c, const node *n, asdl_seq *decorator_seq)
1785 {
1786     /* async_funcdef: ASYNC funcdef */
1787     REQ(n, async_funcdef);
1788     REQ(CHILD(n, 0), ASYNC);
1789     REQ(CHILD(n, 1), funcdef);
1790 
1791     return ast_for_funcdef_impl(c, n, decorator_seq,
1792                                 true /* is_async */);
1793 }
1794 
1795 static stmt_ty
ast_for_funcdef(struct compiling * c,const node * n,asdl_seq * decorator_seq)1796 ast_for_funcdef(struct compiling *c, const node *n, asdl_seq *decorator_seq)
1797 {
1798     /* funcdef: 'def' NAME parameters ['->' test] ':' suite */
1799     return ast_for_funcdef_impl(c, n, decorator_seq,
1800                                 false /* is_async */);
1801 }
1802 
1803 
1804 static stmt_ty
ast_for_async_stmt(struct compiling * c,const node * n)1805 ast_for_async_stmt(struct compiling *c, const node *n)
1806 {
1807     /* async_stmt: ASYNC (funcdef | with_stmt | for_stmt) */
1808     REQ(n, async_stmt);
1809     REQ(CHILD(n, 0), ASYNC);
1810 
1811     switch (TYPE(CHILD(n, 1))) {
1812         case funcdef:
1813             return ast_for_funcdef_impl(c, n, NULL,
1814                                         true /* is_async */);
1815         case with_stmt:
1816             return ast_for_with_stmt(c, n,
1817                                      true /* is_async */);
1818 
1819         case for_stmt:
1820             return ast_for_for_stmt(c, n,
1821                                     true /* is_async */);
1822 
1823         default:
1824             PyErr_Format(PyExc_SystemError,
1825                          "invalid async stament: %s",
1826                          STR(CHILD(n, 1)));
1827             return NULL;
1828     }
1829 }
1830 
1831 static stmt_ty
ast_for_decorated(struct compiling * c,const node * n)1832 ast_for_decorated(struct compiling *c, const node *n)
1833 {
1834     /* decorated: decorators (classdef | funcdef | async_funcdef) */
1835     stmt_ty thing = NULL;
1836     asdl_seq *decorator_seq = NULL;
1837 
1838     REQ(n, decorated);
1839 
1840     decorator_seq = ast_for_decorators(c, CHILD(n, 0));
1841     if (!decorator_seq)
1842       return NULL;
1843 
1844     assert(TYPE(CHILD(n, 1)) == funcdef ||
1845            TYPE(CHILD(n, 1)) == async_funcdef ||
1846            TYPE(CHILD(n, 1)) == classdef);
1847 
1848     if (TYPE(CHILD(n, 1)) == funcdef) {
1849       thing = ast_for_funcdef(c, CHILD(n, 1), decorator_seq);
1850     } else if (TYPE(CHILD(n, 1)) == classdef) {
1851       thing = ast_for_classdef(c, CHILD(n, 1), decorator_seq);
1852     } else if (TYPE(CHILD(n, 1)) == async_funcdef) {
1853       thing = ast_for_async_funcdef(c, CHILD(n, 1), decorator_seq);
1854     }
1855     return thing;
1856 }
1857 
1858 static expr_ty
ast_for_namedexpr(struct compiling * c,const node * n)1859 ast_for_namedexpr(struct compiling *c, const node *n)
1860 {
1861     /* namedexpr_test: test [':=' test]
1862        argument: ( test [comp_for] |
1863             test ':=' test |
1864             test '=' test |
1865             '**' test |
1866             '*' test )
1867     */
1868     expr_ty target, value;
1869 
1870     target = ast_for_expr(c, CHILD(n, 0));
1871     if (!target)
1872         return NULL;
1873 
1874     value = ast_for_expr(c, CHILD(n, 2));
1875     if (!value)
1876         return NULL;
1877 
1878     if (target->kind != Name_kind) {
1879         const char *expr_name = get_expr_name(target);
1880         if (expr_name != NULL) {
1881             ast_error(c, n, "cannot use assignment expressions with %s", expr_name);
1882         }
1883         return NULL;
1884     }
1885 
1886     if (!set_context(c, target, Store, n))
1887         return NULL;
1888 
1889     return NamedExpr(target, value, LINENO(n), n->n_col_offset, n->n_end_lineno,
1890                      n->n_end_col_offset, c->c_arena);
1891 }
1892 
1893 static expr_ty
ast_for_lambdef(struct compiling * c,const node * n)1894 ast_for_lambdef(struct compiling *c, const node *n)
1895 {
1896     /* lambdef: 'lambda' [varargslist] ':' test
1897        lambdef_nocond: 'lambda' [varargslist] ':' test_nocond */
1898     arguments_ty args;
1899     expr_ty expression;
1900 
1901     if (NCH(n) == 3) {
1902         args = arguments(NULL, NULL, NULL, NULL, NULL, NULL, NULL, c->c_arena);
1903         if (!args)
1904             return NULL;
1905         expression = ast_for_expr(c, CHILD(n, 2));
1906         if (!expression)
1907             return NULL;
1908     }
1909     else {
1910         args = ast_for_arguments(c, CHILD(n, 1));
1911         if (!args)
1912             return NULL;
1913         expression = ast_for_expr(c, CHILD(n, 3));
1914         if (!expression)
1915             return NULL;
1916     }
1917 
1918     return Lambda(args, expression, LINENO(n), n->n_col_offset,
1919                   n->n_end_lineno, n->n_end_col_offset, c->c_arena);
1920 }
1921 
1922 static expr_ty
ast_for_ifexpr(struct compiling * c,const node * n)1923 ast_for_ifexpr(struct compiling *c, const node *n)
1924 {
1925     /* test: or_test 'if' or_test 'else' test */
1926     expr_ty expression, body, orelse;
1927 
1928     assert(NCH(n) == 5);
1929     body = ast_for_expr(c, CHILD(n, 0));
1930     if (!body)
1931         return NULL;
1932     expression = ast_for_expr(c, CHILD(n, 2));
1933     if (!expression)
1934         return NULL;
1935     orelse = ast_for_expr(c, CHILD(n, 4));
1936     if (!orelse)
1937         return NULL;
1938     return IfExp(expression, body, orelse, LINENO(n), n->n_col_offset,
1939                  n->n_end_lineno, n->n_end_col_offset,
1940                  c->c_arena);
1941 }
1942 
1943 /*
1944    Count the number of 'for' loops in a comprehension.
1945 
1946    Helper for ast_for_comprehension().
1947 */
1948 
1949 static int
count_comp_fors(struct compiling * c,const node * n)1950 count_comp_fors(struct compiling *c, const node *n)
1951 {
1952     int n_fors = 0;
1953 
1954   count_comp_for:
1955     n_fors++;
1956     REQ(n, comp_for);
1957     if (NCH(n) == 2) {
1958         REQ(CHILD(n, 0), ASYNC);
1959         n = CHILD(n, 1);
1960     }
1961     else if (NCH(n) == 1) {
1962         n = CHILD(n, 0);
1963     }
1964     else {
1965         goto error;
1966     }
1967     if (NCH(n) == (5)) {
1968         n = CHILD(n, 4);
1969     }
1970     else {
1971         return n_fors;
1972     }
1973   count_comp_iter:
1974     REQ(n, comp_iter);
1975     n = CHILD(n, 0);
1976     if (TYPE(n) == comp_for)
1977         goto count_comp_for;
1978     else if (TYPE(n) == comp_if) {
1979         if (NCH(n) == 3) {
1980             n = CHILD(n, 2);
1981             goto count_comp_iter;
1982         }
1983         else
1984             return n_fors;
1985     }
1986 
1987   error:
1988     /* Should never be reached */
1989     PyErr_SetString(PyExc_SystemError,
1990                     "logic error in count_comp_fors");
1991     return -1;
1992 }
1993 
1994 /* Count the number of 'if' statements in a comprehension.
1995 
1996    Helper for ast_for_comprehension().
1997 */
1998 
1999 static int
count_comp_ifs(struct compiling * c,const node * n)2000 count_comp_ifs(struct compiling *c, const node *n)
2001 {
2002     int n_ifs = 0;
2003 
2004     while (1) {
2005         REQ(n, comp_iter);
2006         if (TYPE(CHILD(n, 0)) == comp_for)
2007             return n_ifs;
2008         n = CHILD(n, 0);
2009         REQ(n, comp_if);
2010         n_ifs++;
2011         if (NCH(n) == 2)
2012             return n_ifs;
2013         n = CHILD(n, 2);
2014     }
2015 }
2016 
2017 static asdl_seq *
ast_for_comprehension(struct compiling * c,const node * n)2018 ast_for_comprehension(struct compiling *c, const node *n)
2019 {
2020     int i, n_fors;
2021     asdl_seq *comps;
2022 
2023     n_fors = count_comp_fors(c, n);
2024     if (n_fors == -1)
2025         return NULL;
2026 
2027     comps = _Py_asdl_seq_new(n_fors, c->c_arena);
2028     if (!comps)
2029         return NULL;
2030 
2031     for (i = 0; i < n_fors; i++) {
2032         comprehension_ty comp;
2033         asdl_seq *t;
2034         expr_ty expression, first;
2035         node *for_ch;
2036         node *sync_n;
2037         int is_async = 0;
2038 
2039         REQ(n, comp_for);
2040 
2041         if (NCH(n) == 2) {
2042             is_async = 1;
2043             REQ(CHILD(n, 0), ASYNC);
2044             sync_n = CHILD(n, 1);
2045         }
2046         else {
2047             sync_n = CHILD(n, 0);
2048         }
2049         REQ(sync_n, sync_comp_for);
2050 
2051         /* Async comprehensions only allowed in Python 3.6 and greater */
2052         if (is_async && c->c_feature_version < 6) {
2053             ast_error(c, n,
2054                       "Async comprehensions are only supported in Python 3.6 and greater");
2055             return NULL;
2056         }
2057 
2058         for_ch = CHILD(sync_n, 1);
2059         t = ast_for_exprlist(c, for_ch, Store);
2060         if (!t)
2061             return NULL;
2062         expression = ast_for_expr(c, CHILD(sync_n, 3));
2063         if (!expression)
2064             return NULL;
2065 
2066         /* Check the # of children rather than the length of t, since
2067            (x for x, in ...) has 1 element in t, but still requires a Tuple. */
2068         first = (expr_ty)asdl_seq_GET(t, 0);
2069         if (NCH(for_ch) == 1)
2070             comp = comprehension(first, expression, NULL,
2071                                  is_async, c->c_arena);
2072         else
2073             comp = comprehension(Tuple(t, Store, first->lineno, first->col_offset,
2074                                        for_ch->n_end_lineno, for_ch->n_end_col_offset,
2075                                        c->c_arena),
2076                                  expression, NULL, is_async, c->c_arena);
2077         if (!comp)
2078             return NULL;
2079 
2080         if (NCH(sync_n) == 5) {
2081             int j, n_ifs;
2082             asdl_seq *ifs;
2083 
2084             n = CHILD(sync_n, 4);
2085             n_ifs = count_comp_ifs(c, n);
2086             if (n_ifs == -1)
2087                 return NULL;
2088 
2089             ifs = _Py_asdl_seq_new(n_ifs, c->c_arena);
2090             if (!ifs)
2091                 return NULL;
2092 
2093             for (j = 0; j < n_ifs; j++) {
2094                 REQ(n, comp_iter);
2095                 n = CHILD(n, 0);
2096                 REQ(n, comp_if);
2097 
2098                 expression = ast_for_expr(c, CHILD(n, 1));
2099                 if (!expression)
2100                     return NULL;
2101                 asdl_seq_SET(ifs, j, expression);
2102                 if (NCH(n) == 3)
2103                     n = CHILD(n, 2);
2104             }
2105             /* on exit, must guarantee that n is a comp_for */
2106             if (TYPE(n) == comp_iter)
2107                 n = CHILD(n, 0);
2108             comp->ifs = ifs;
2109         }
2110         asdl_seq_SET(comps, i, comp);
2111     }
2112     return comps;
2113 }
2114 
2115 static expr_ty
ast_for_itercomp(struct compiling * c,const node * n,int type)2116 ast_for_itercomp(struct compiling *c, const node *n, int type)
2117 {
2118     /* testlist_comp: (test|star_expr)
2119      *                ( comp_for | (',' (test|star_expr))* [','] ) */
2120     expr_ty elt;
2121     asdl_seq *comps;
2122     node *ch;
2123 
2124     assert(NCH(n) > 1);
2125 
2126     ch = CHILD(n, 0);
2127     elt = ast_for_expr(c, ch);
2128     if (!elt)
2129         return NULL;
2130     if (elt->kind == Starred_kind) {
2131         ast_error(c, ch, "iterable unpacking cannot be used in comprehension");
2132         return NULL;
2133     }
2134 
2135     comps = ast_for_comprehension(c, CHILD(n, 1));
2136     if (!comps)
2137         return NULL;
2138 
2139     if (type == COMP_GENEXP)
2140         return GeneratorExp(elt, comps, LINENO(n), n->n_col_offset,
2141                             n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2142     else if (type == COMP_LISTCOMP)
2143         return ListComp(elt, comps, LINENO(n), n->n_col_offset,
2144                         n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2145     else if (type == COMP_SETCOMP)
2146         return SetComp(elt, comps, LINENO(n), n->n_col_offset,
2147                        n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2148     else
2149         /* Should never happen */
2150         return NULL;
2151 }
2152 
2153 /* Fills in the key, value pair corresponding to the dict element.  In case
2154  * of an unpacking, key is NULL.  *i is advanced by the number of ast
2155  * elements.  Iff successful, nonzero is returned.
2156  */
2157 static int
ast_for_dictelement(struct compiling * c,const node * n,int * i,expr_ty * key,expr_ty * value)2158 ast_for_dictelement(struct compiling *c, const node *n, int *i,
2159                     expr_ty *key, expr_ty *value)
2160 {
2161     expr_ty expression;
2162     if (TYPE(CHILD(n, *i)) == DOUBLESTAR) {
2163         assert(NCH(n) - *i >= 2);
2164 
2165         expression = ast_for_expr(c, CHILD(n, *i + 1));
2166         if (!expression)
2167             return 0;
2168         *key = NULL;
2169         *value = expression;
2170 
2171         *i += 2;
2172     }
2173     else {
2174         assert(NCH(n) - *i >= 3);
2175 
2176         expression = ast_for_expr(c, CHILD(n, *i));
2177         if (!expression)
2178             return 0;
2179         *key = expression;
2180 
2181         REQ(CHILD(n, *i + 1), COLON);
2182 
2183         expression = ast_for_expr(c, CHILD(n, *i + 2));
2184         if (!expression)
2185             return 0;
2186         *value = expression;
2187 
2188         *i += 3;
2189     }
2190     return 1;
2191 }
2192 
2193 static expr_ty
ast_for_dictcomp(struct compiling * c,const node * n)2194 ast_for_dictcomp(struct compiling *c, const node *n)
2195 {
2196     expr_ty key, value;
2197     asdl_seq *comps;
2198     int i = 0;
2199 
2200     if (!ast_for_dictelement(c, n, &i, &key, &value))
2201         return NULL;
2202     assert(key);
2203     assert(NCH(n) - i >= 1);
2204 
2205     comps = ast_for_comprehension(c, CHILD(n, i));
2206     if (!comps)
2207         return NULL;
2208 
2209     return DictComp(key, value, comps, LINENO(n), n->n_col_offset,
2210                     n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2211 }
2212 
2213 static expr_ty
ast_for_dictdisplay(struct compiling * c,const node * n)2214 ast_for_dictdisplay(struct compiling *c, const node *n)
2215 {
2216     int i;
2217     int j;
2218     int size;
2219     asdl_seq *keys, *values;
2220 
2221     size = (NCH(n) + 1) / 3; /* +1 in case no trailing comma */
2222     keys = _Py_asdl_seq_new(size, c->c_arena);
2223     if (!keys)
2224         return NULL;
2225 
2226     values = _Py_asdl_seq_new(size, c->c_arena);
2227     if (!values)
2228         return NULL;
2229 
2230     j = 0;
2231     for (i = 0; i < NCH(n); i++) {
2232         expr_ty key, value;
2233 
2234         if (!ast_for_dictelement(c, n, &i, &key, &value))
2235             return NULL;
2236         asdl_seq_SET(keys, j, key);
2237         asdl_seq_SET(values, j, value);
2238 
2239         j++;
2240     }
2241     keys->size = j;
2242     values->size = j;
2243     return Dict(keys, values, LINENO(n), n->n_col_offset,
2244                 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2245 }
2246 
2247 static expr_ty
ast_for_genexp(struct compiling * c,const node * n)2248 ast_for_genexp(struct compiling *c, const node *n)
2249 {
2250     assert(TYPE(n) == (testlist_comp) || TYPE(n) == (argument));
2251     return ast_for_itercomp(c, n, COMP_GENEXP);
2252 }
2253 
2254 static expr_ty
ast_for_listcomp(struct compiling * c,const node * n)2255 ast_for_listcomp(struct compiling *c, const node *n)
2256 {
2257     assert(TYPE(n) == (testlist_comp));
2258     return ast_for_itercomp(c, n, COMP_LISTCOMP);
2259 }
2260 
2261 static expr_ty
ast_for_setcomp(struct compiling * c,const node * n)2262 ast_for_setcomp(struct compiling *c, const node *n)
2263 {
2264     assert(TYPE(n) == (dictorsetmaker));
2265     return ast_for_itercomp(c, n, COMP_SETCOMP);
2266 }
2267 
2268 static expr_ty
ast_for_setdisplay(struct compiling * c,const node * n)2269 ast_for_setdisplay(struct compiling *c, const node *n)
2270 {
2271     int i;
2272     int size;
2273     asdl_seq *elts;
2274 
2275     assert(TYPE(n) == (dictorsetmaker));
2276     size = (NCH(n) + 1) / 2; /* +1 in case no trailing comma */
2277     elts = _Py_asdl_seq_new(size, c->c_arena);
2278     if (!elts)
2279         return NULL;
2280     for (i = 0; i < NCH(n); i += 2) {
2281         expr_ty expression;
2282         expression = ast_for_expr(c, CHILD(n, i));
2283         if (!expression)
2284             return NULL;
2285         asdl_seq_SET(elts, i / 2, expression);
2286     }
2287     return Set(elts, LINENO(n), n->n_col_offset,
2288                n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2289 }
2290 
2291 static expr_ty
ast_for_atom(struct compiling * c,const node * n)2292 ast_for_atom(struct compiling *c, const node *n)
2293 {
2294     /* atom: '(' [yield_expr|testlist_comp] ')' | '[' [testlist_comp] ']'
2295        | '{' [dictmaker|testlist_comp] '}' | NAME | NUMBER | STRING+
2296        | '...' | 'None' | 'True' | 'False'
2297     */
2298     node *ch = CHILD(n, 0);
2299 
2300     switch (TYPE(ch)) {
2301     case NAME: {
2302         PyObject *name;
2303         const char *s = STR(ch);
2304         size_t len = strlen(s);
2305         if (len >= 4 && len <= 5) {
2306             if (!strcmp(s, "None"))
2307                 return Constant(Py_None, NULL, LINENO(n), n->n_col_offset,
2308                                 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2309             if (!strcmp(s, "True"))
2310                 return Constant(Py_True, NULL, LINENO(n), n->n_col_offset,
2311                                 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2312             if (!strcmp(s, "False"))
2313                 return Constant(Py_False, NULL, LINENO(n), n->n_col_offset,
2314                                 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2315         }
2316         name = new_identifier(s, c);
2317         if (!name)
2318             return NULL;
2319         /* All names start in Load context, but may later be changed. */
2320         return Name(name, Load, LINENO(n), n->n_col_offset,
2321                     n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2322     }
2323     case STRING: {
2324         expr_ty str = parsestrplus(c, n);
2325         if (!str) {
2326             const char *errtype = NULL;
2327             if (PyErr_ExceptionMatches(PyExc_UnicodeError))
2328                 errtype = "unicode error";
2329             else if (PyErr_ExceptionMatches(PyExc_ValueError))
2330                 errtype = "value error";
2331             if (errtype) {
2332                 PyObject *type, *value, *tback, *errstr;
2333                 PyErr_Fetch(&type, &value, &tback);
2334                 errstr = PyObject_Str(value);
2335                 if (errstr) {
2336                     ast_error(c, n, "(%s) %U", errtype, errstr);
2337                     Py_DECREF(errstr);
2338                 }
2339                 else {
2340                     PyErr_Clear();
2341                     ast_error(c, n, "(%s) unknown error", errtype);
2342                 }
2343                 Py_DECREF(type);
2344                 Py_XDECREF(value);
2345                 Py_XDECREF(tback);
2346             }
2347             return NULL;
2348         }
2349         return str;
2350     }
2351     case NUMBER: {
2352         PyObject *pynum;
2353         /* Underscores in numeric literals are only allowed in Python 3.6 or greater */
2354         /* Check for underscores here rather than in parse_number so we can report a line number on error */
2355         if (c->c_feature_version < 6 && strchr(STR(ch), '_') != NULL) {
2356             ast_error(c, ch,
2357                       "Underscores in numeric literals are only supported in Python 3.6 and greater");
2358             return NULL;
2359         }
2360         pynum = parsenumber(c, STR(ch));
2361         if (!pynum)
2362             return NULL;
2363 
2364         if (PyArena_AddPyObject(c->c_arena, pynum) < 0) {
2365             Py_DECREF(pynum);
2366             return NULL;
2367         }
2368         return Constant(pynum, NULL, LINENO(n), n->n_col_offset,
2369                         n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2370     }
2371     case ELLIPSIS: /* Ellipsis */
2372         return Constant(Py_Ellipsis, NULL, LINENO(n), n->n_col_offset,
2373                         n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2374     case LPAR: /* some parenthesized expressions */
2375         ch = CHILD(n, 1);
2376 
2377         if (TYPE(ch) == RPAR)
2378             return Tuple(NULL, Load, LINENO(n), n->n_col_offset,
2379                          n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2380 
2381         if (TYPE(ch) == yield_expr)
2382             return ast_for_expr(c, ch);
2383 
2384         /* testlist_comp: test ( comp_for | (',' test)* [','] ) */
2385         if (NCH(ch) == 1) {
2386             return ast_for_testlist(c, ch);
2387         }
2388 
2389         if (TYPE(CHILD(ch, 1)) == comp_for) {
2390             return copy_location(ast_for_genexp(c, ch), n, n);
2391         }
2392         else {
2393             return copy_location(ast_for_testlist(c, ch), n, n);
2394         }
2395     case LSQB: /* list (or list comprehension) */
2396         ch = CHILD(n, 1);
2397 
2398         if (TYPE(ch) == RSQB)
2399             return List(NULL, Load, LINENO(n), n->n_col_offset,
2400                         n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2401 
2402         REQ(ch, testlist_comp);
2403         if (NCH(ch) == 1 || TYPE(CHILD(ch, 1)) == COMMA) {
2404             asdl_seq *elts = seq_for_testlist(c, ch);
2405             if (!elts)
2406                 return NULL;
2407 
2408             return List(elts, Load, LINENO(n), n->n_col_offset,
2409                         n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2410         }
2411         else {
2412             return copy_location(ast_for_listcomp(c, ch), n, n);
2413         }
2414     case LBRACE: {
2415         /* dictorsetmaker: ( ((test ':' test | '**' test)
2416          *                    (comp_for | (',' (test ':' test | '**' test))* [','])) |
2417          *                   ((test | '*' test)
2418          *                    (comp_for | (',' (test | '*' test))* [','])) ) */
2419         expr_ty res;
2420         ch = CHILD(n, 1);
2421         if (TYPE(ch) == RBRACE) {
2422             /* It's an empty dict. */
2423             return Dict(NULL, NULL, LINENO(n), n->n_col_offset,
2424                         n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2425         }
2426         else {
2427             int is_dict = (TYPE(CHILD(ch, 0)) == DOUBLESTAR);
2428             if (NCH(ch) == 1 ||
2429                     (NCH(ch) > 1 &&
2430                      TYPE(CHILD(ch, 1)) == COMMA)) {
2431                 /* It's a set display. */
2432                 res = ast_for_setdisplay(c, ch);
2433             }
2434             else if (NCH(ch) > 1 &&
2435                     TYPE(CHILD(ch, 1)) == comp_for) {
2436                 /* It's a set comprehension. */
2437                 res = ast_for_setcomp(c, ch);
2438             }
2439             else if (NCH(ch) > 3 - is_dict &&
2440                     TYPE(CHILD(ch, 3 - is_dict)) == comp_for) {
2441                 /* It's a dictionary comprehension. */
2442                 if (is_dict) {
2443                     ast_error(c, n,
2444                               "dict unpacking cannot be used in dict comprehension");
2445                     return NULL;
2446                 }
2447                 res = ast_for_dictcomp(c, ch);
2448             }
2449             else {
2450                 /* It's a dictionary display. */
2451                 res = ast_for_dictdisplay(c, ch);
2452             }
2453             return copy_location(res, n, n);
2454         }
2455     }
2456     default:
2457         PyErr_Format(PyExc_SystemError, "unhandled atom %d", TYPE(ch));
2458         return NULL;
2459     }
2460 }
2461 
2462 static expr_ty
ast_for_slice(struct compiling * c,const node * n)2463 ast_for_slice(struct compiling *c, const node *n)
2464 {
2465     node *ch;
2466     expr_ty lower = NULL, upper = NULL, step = NULL;
2467 
2468     REQ(n, subscript);
2469 
2470     /*
2471        subscript: test | [test] ':' [test] [sliceop]
2472        sliceop: ':' [test]
2473     */
2474     ch = CHILD(n, 0);
2475     if (NCH(n) == 1 && TYPE(ch) == test) {
2476         return ast_for_expr(c, ch);
2477     }
2478 
2479     if (TYPE(ch) == test) {
2480         lower = ast_for_expr(c, ch);
2481         if (!lower)
2482             return NULL;
2483     }
2484 
2485     /* If there's an upper bound it's in the second or third position. */
2486     if (TYPE(ch) == COLON) {
2487         if (NCH(n) > 1) {
2488             node *n2 = CHILD(n, 1);
2489 
2490             if (TYPE(n2) == test) {
2491                 upper = ast_for_expr(c, n2);
2492                 if (!upper)
2493                     return NULL;
2494             }
2495         }
2496     } else if (NCH(n) > 2) {
2497         node *n2 = CHILD(n, 2);
2498 
2499         if (TYPE(n2) == test) {
2500             upper = ast_for_expr(c, n2);
2501             if (!upper)
2502                 return NULL;
2503         }
2504     }
2505 
2506     ch = CHILD(n, NCH(n) - 1);
2507     if (TYPE(ch) == sliceop) {
2508         if (NCH(ch) != 1) {
2509             ch = CHILD(ch, 1);
2510             if (TYPE(ch) == test) {
2511                 step = ast_for_expr(c, ch);
2512                 if (!step)
2513                     return NULL;
2514             }
2515         }
2516     }
2517 
2518     return Slice(lower, upper, step, LINENO(n), n->n_col_offset,
2519                  n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2520 }
2521 
2522 static expr_ty
ast_for_binop(struct compiling * c,const node * n)2523 ast_for_binop(struct compiling *c, const node *n)
2524 {
2525     /* Must account for a sequence of expressions.
2526        How should A op B op C by represented?
2527        BinOp(BinOp(A, op, B), op, C).
2528     */
2529 
2530     int i, nops;
2531     expr_ty expr1, expr2, result;
2532     operator_ty newoperator;
2533 
2534     expr1 = ast_for_expr(c, CHILD(n, 0));
2535     if (!expr1)
2536         return NULL;
2537 
2538     expr2 = ast_for_expr(c, CHILD(n, 2));
2539     if (!expr2)
2540         return NULL;
2541 
2542     newoperator = get_operator(c, CHILD(n, 1));
2543     if (!newoperator)
2544         return NULL;
2545 
2546     result = BinOp(expr1, newoperator, expr2, LINENO(n), n->n_col_offset,
2547                    CHILD(n, 2)->n_end_lineno, CHILD(n, 2)->n_end_col_offset,
2548                    c->c_arena);
2549     if (!result)
2550         return NULL;
2551 
2552     nops = (NCH(n) - 1) / 2;
2553     for (i = 1; i < nops; i++) {
2554         expr_ty tmp_result, tmp;
2555         const node* next_oper = CHILD(n, i * 2 + 1);
2556 
2557         newoperator = get_operator(c, next_oper);
2558         if (!newoperator)
2559             return NULL;
2560 
2561         tmp = ast_for_expr(c, CHILD(n, i * 2 + 2));
2562         if (!tmp)
2563             return NULL;
2564 
2565         tmp_result = BinOp(result, newoperator, tmp,
2566                            LINENO(n), n->n_col_offset,
2567                            CHILD(n, i * 2 + 2)->n_end_lineno,
2568                            CHILD(n, i * 2 + 2)->n_end_col_offset,
2569                            c->c_arena);
2570         if (!tmp_result)
2571             return NULL;
2572         result = tmp_result;
2573     }
2574     return result;
2575 }
2576 
2577 static expr_ty
ast_for_trailer(struct compiling * c,const node * n,expr_ty left_expr,const node * start)2578 ast_for_trailer(struct compiling *c, const node *n, expr_ty left_expr, const node *start)
2579 {
2580     /* trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
2581        subscriptlist: subscript (',' subscript)* [',']
2582        subscript: '.' '.' '.' | test | [test] ':' [test] [sliceop]
2583      */
2584     const node *n_copy = n;
2585     REQ(n, trailer);
2586     if (TYPE(CHILD(n, 0)) == LPAR) {
2587         if (NCH(n) == 2)
2588             return Call(left_expr, NULL, NULL, LINENO(start), start->n_col_offset,
2589                         n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2590         else
2591             return ast_for_call(c, CHILD(n, 1), left_expr,
2592                                 start, CHILD(n, 0), CHILD(n, 2));
2593     }
2594     else if (TYPE(CHILD(n, 0)) == DOT) {
2595         PyObject *attr_id = NEW_IDENTIFIER(CHILD(n, 1));
2596         if (!attr_id)
2597             return NULL;
2598         return Attribute(left_expr, attr_id, Load,
2599                          LINENO(start), start->n_col_offset,
2600                          n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2601     }
2602     else {
2603         REQ(CHILD(n, 0), LSQB);
2604         REQ(CHILD(n, 2), RSQB);
2605         n = CHILD(n, 1);
2606         if (NCH(n) == 1) {
2607             expr_ty slc = ast_for_slice(c, CHILD(n, 0));
2608             if (!slc)
2609                 return NULL;
2610             return Subscript(left_expr, slc, Load, LINENO(start), start->n_col_offset,
2611                              n_copy->n_end_lineno, n_copy->n_end_col_offset,
2612                              c->c_arena);
2613         }
2614         else {
2615             int j;
2616             expr_ty slc, e;
2617             asdl_seq *elts;
2618             elts = _Py_asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
2619             if (!elts)
2620                 return NULL;
2621             for (j = 0; j < NCH(n); j += 2) {
2622                 slc = ast_for_slice(c, CHILD(n, j));
2623                 if (!slc)
2624                     return NULL;
2625                 asdl_seq_SET(elts, j / 2, slc);
2626             }
2627             e = Tuple(elts, Load, LINENO(n), n->n_col_offset,
2628                       n->n_end_lineno, n->n_end_col_offset,
2629                       c->c_arena);
2630             if (!e)
2631                 return NULL;
2632             return Subscript(left_expr, e,
2633                              Load, LINENO(start), start->n_col_offset,
2634                              n_copy->n_end_lineno, n_copy->n_end_col_offset,
2635                              c->c_arena);
2636         }
2637     }
2638 }
2639 
2640 static expr_ty
ast_for_factor(struct compiling * c,const node * n)2641 ast_for_factor(struct compiling *c, const node *n)
2642 {
2643     expr_ty expression;
2644 
2645     expression = ast_for_expr(c, CHILD(n, 1));
2646     if (!expression)
2647         return NULL;
2648 
2649     switch (TYPE(CHILD(n, 0))) {
2650         case PLUS:
2651             return UnaryOp(UAdd, expression, LINENO(n), n->n_col_offset,
2652                            n->n_end_lineno, n->n_end_col_offset,
2653                            c->c_arena);
2654         case MINUS:
2655             return UnaryOp(USub, expression, LINENO(n), n->n_col_offset,
2656                            n->n_end_lineno, n->n_end_col_offset,
2657                            c->c_arena);
2658         case TILDE:
2659             return UnaryOp(Invert, expression, LINENO(n), n->n_col_offset,
2660                            n->n_end_lineno, n->n_end_col_offset,
2661                            c->c_arena);
2662     }
2663     PyErr_Format(PyExc_SystemError, "unhandled factor: %d",
2664                  TYPE(CHILD(n, 0)));
2665     return NULL;
2666 }
2667 
2668 static expr_ty
ast_for_atom_expr(struct compiling * c,const node * n)2669 ast_for_atom_expr(struct compiling *c, const node *n)
2670 {
2671     int i, nch, start = 0;
2672     expr_ty e;
2673 
2674     REQ(n, atom_expr);
2675     nch = NCH(n);
2676 
2677     if (TYPE(CHILD(n, 0)) == AWAIT) {
2678         if (c->c_feature_version < 5) {
2679             ast_error(c, n,
2680                       "Await expressions are only supported in Python 3.5 and greater");
2681             return NULL;
2682         }
2683         start = 1;
2684         assert(nch > 1);
2685     }
2686 
2687     e = ast_for_atom(c, CHILD(n, start));
2688     if (!e)
2689         return NULL;
2690     if (nch == 1)
2691         return e;
2692     if (start && nch == 2) {
2693         return Await(e, LINENO(n), n->n_col_offset,
2694                      n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2695     }
2696 
2697     for (i = start + 1; i < nch; i++) {
2698         node *ch = CHILD(n, i);
2699         if (TYPE(ch) != trailer)
2700             break;
2701         e = ast_for_trailer(c, ch, e, CHILD(n, start));
2702         if (!e)
2703             return NULL;
2704     }
2705 
2706     if (start) {
2707         /* there was an 'await' */
2708         return Await(e, LINENO(n), n->n_col_offset,
2709                      n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2710     }
2711     else {
2712         return e;
2713     }
2714 }
2715 
2716 static expr_ty
ast_for_power(struct compiling * c,const node * n)2717 ast_for_power(struct compiling *c, const node *n)
2718 {
2719     /* power: atom trailer* ('**' factor)*
2720      */
2721     expr_ty e;
2722     REQ(n, power);
2723     e = ast_for_atom_expr(c, CHILD(n, 0));
2724     if (!e)
2725         return NULL;
2726     if (NCH(n) == 1)
2727         return e;
2728     if (TYPE(CHILD(n, NCH(n) - 1)) == factor) {
2729         expr_ty f = ast_for_expr(c, CHILD(n, NCH(n) - 1));
2730         if (!f)
2731             return NULL;
2732         e = BinOp(e, Pow, f, LINENO(n), n->n_col_offset,
2733                   n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2734     }
2735     return e;
2736 }
2737 
2738 static expr_ty
ast_for_starred(struct compiling * c,const node * n)2739 ast_for_starred(struct compiling *c, const node *n)
2740 {
2741     expr_ty tmp;
2742     REQ(n, star_expr);
2743 
2744     tmp = ast_for_expr(c, CHILD(n, 1));
2745     if (!tmp)
2746         return NULL;
2747 
2748     /* The Load context is changed later. */
2749     return Starred(tmp, Load, LINENO(n), n->n_col_offset,
2750                    n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2751 }
2752 
2753 
2754 /* Do not name a variable 'expr'!  Will cause a compile error.
2755 */
2756 
2757 static expr_ty
ast_for_expr(struct compiling * c,const node * n)2758 ast_for_expr(struct compiling *c, const node *n)
2759 {
2760     /* handle the full range of simple expressions
2761        namedexpr_test: test [':=' test]
2762        test: or_test ['if' or_test 'else' test] | lambdef
2763        test_nocond: or_test | lambdef_nocond
2764        or_test: and_test ('or' and_test)*
2765        and_test: not_test ('and' not_test)*
2766        not_test: 'not' not_test | comparison
2767        comparison: expr (comp_op expr)*
2768        expr: xor_expr ('|' xor_expr)*
2769        xor_expr: and_expr ('^' and_expr)*
2770        and_expr: shift_expr ('&' shift_expr)*
2771        shift_expr: arith_expr (('<<'|'>>') arith_expr)*
2772        arith_expr: term (('+'|'-') term)*
2773        term: factor (('*'|'@'|'/'|'%'|'//') factor)*
2774        factor: ('+'|'-'|'~') factor | power
2775        power: atom_expr ['**' factor]
2776        atom_expr: [AWAIT] atom trailer*
2777        yield_expr: 'yield' [yield_arg]
2778     */
2779 
2780     asdl_seq *seq;
2781     int i;
2782 
2783  loop:
2784     switch (TYPE(n)) {
2785         case namedexpr_test:
2786             if (NCH(n) == 3)
2787                 return ast_for_namedexpr(c, n);
2788             /* Fallthrough */
2789         case test:
2790         case test_nocond:
2791             if (TYPE(CHILD(n, 0)) == lambdef ||
2792                 TYPE(CHILD(n, 0)) == lambdef_nocond)
2793                 return ast_for_lambdef(c, CHILD(n, 0));
2794             else if (NCH(n) > 1)
2795                 return ast_for_ifexpr(c, n);
2796             /* Fallthrough */
2797         case or_test:
2798         case and_test:
2799             if (NCH(n) == 1) {
2800                 n = CHILD(n, 0);
2801                 goto loop;
2802             }
2803             seq = _Py_asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
2804             if (!seq)
2805                 return NULL;
2806             for (i = 0; i < NCH(n); i += 2) {
2807                 expr_ty e = ast_for_expr(c, CHILD(n, i));
2808                 if (!e)
2809                     return NULL;
2810                 asdl_seq_SET(seq, i / 2, e);
2811             }
2812             if (!strcmp(STR(CHILD(n, 1)), "and"))
2813                 return BoolOp(And, seq, LINENO(n), n->n_col_offset,
2814                               n->n_end_lineno, n->n_end_col_offset,
2815                               c->c_arena);
2816             assert(!strcmp(STR(CHILD(n, 1)), "or"));
2817             return BoolOp(Or, seq, LINENO(n), n->n_col_offset,
2818                           n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2819         case not_test:
2820             if (NCH(n) == 1) {
2821                 n = CHILD(n, 0);
2822                 goto loop;
2823             }
2824             else {
2825                 expr_ty expression = ast_for_expr(c, CHILD(n, 1));
2826                 if (!expression)
2827                     return NULL;
2828 
2829                 return UnaryOp(Not, expression, LINENO(n), n->n_col_offset,
2830                                n->n_end_lineno, n->n_end_col_offset,
2831                                c->c_arena);
2832             }
2833         case comparison:
2834             if (NCH(n) == 1) {
2835                 n = CHILD(n, 0);
2836                 goto loop;
2837             }
2838             else {
2839                 expr_ty expression;
2840                 asdl_int_seq *ops;
2841                 asdl_seq *cmps;
2842                 ops = _Py_asdl_int_seq_new(NCH(n) / 2, c->c_arena);
2843                 if (!ops)
2844                     return NULL;
2845                 cmps = _Py_asdl_seq_new(NCH(n) / 2, c->c_arena);
2846                 if (!cmps) {
2847                     return NULL;
2848                 }
2849                 for (i = 1; i < NCH(n); i += 2) {
2850                     cmpop_ty newoperator;
2851 
2852                     newoperator = ast_for_comp_op(c, CHILD(n, i));
2853                     if (!newoperator) {
2854                         return NULL;
2855                     }
2856 
2857                     expression = ast_for_expr(c, CHILD(n, i + 1));
2858                     if (!expression) {
2859                         return NULL;
2860                     }
2861 
2862                     asdl_seq_SET(ops, i / 2, newoperator);
2863                     asdl_seq_SET(cmps, i / 2, expression);
2864                 }
2865                 expression = ast_for_expr(c, CHILD(n, 0));
2866                 if (!expression) {
2867                     return NULL;
2868                 }
2869 
2870                 return Compare(expression, ops, cmps, LINENO(n), n->n_col_offset,
2871                                n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2872             }
2873 
2874         case star_expr:
2875             return ast_for_starred(c, n);
2876         /* The next five cases all handle BinOps.  The main body of code
2877            is the same in each case, but the switch turned inside out to
2878            reuse the code for each type of operator.
2879          */
2880         case expr:
2881         case xor_expr:
2882         case and_expr:
2883         case shift_expr:
2884         case arith_expr:
2885         case term:
2886             if (NCH(n) == 1) {
2887                 n = CHILD(n, 0);
2888                 goto loop;
2889             }
2890             return ast_for_binop(c, n);
2891         case yield_expr: {
2892             node *an = NULL;
2893             node *en = NULL;
2894             int is_from = 0;
2895             expr_ty exp = NULL;
2896             if (NCH(n) > 1)
2897                 an = CHILD(n, 1); /* yield_arg */
2898             if (an) {
2899                 en = CHILD(an, NCH(an) - 1);
2900                 if (NCH(an) == 2) {
2901                     is_from = 1;
2902                     exp = ast_for_expr(c, en);
2903                 }
2904                 else
2905                     exp = ast_for_testlist(c, en);
2906                 if (!exp)
2907                     return NULL;
2908             }
2909             if (is_from)
2910                 return YieldFrom(exp, LINENO(n), n->n_col_offset,
2911                                  n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2912             return Yield(exp, LINENO(n), n->n_col_offset,
2913                          n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2914         }
2915         case factor:
2916             if (NCH(n) == 1) {
2917                 n = CHILD(n, 0);
2918                 goto loop;
2919             }
2920             return ast_for_factor(c, n);
2921         case power:
2922             return ast_for_power(c, n);
2923         default:
2924             PyErr_Format(PyExc_SystemError, "unhandled expr: %d", TYPE(n));
2925             return NULL;
2926     }
2927     /* should never get here unless if error is set */
2928     return NULL;
2929 }
2930 
2931 static expr_ty
ast_for_call(struct compiling * c,const node * n,expr_ty func,const node * start,const node * maybegenbeg,const node * closepar)2932 ast_for_call(struct compiling *c, const node *n, expr_ty func,
2933              const node *start, const node *maybegenbeg, const node *closepar)
2934 {
2935     /*
2936       arglist: argument (',' argument)*  [',']
2937       argument: ( test [comp_for] | '*' test | test '=' test | '**' test )
2938     */
2939 
2940     int i, nargs, nkeywords;
2941     int ndoublestars;
2942     asdl_seq *args;
2943     asdl_seq *keywords;
2944 
2945     REQ(n, arglist);
2946 
2947     nargs = 0;
2948     nkeywords = 0;
2949     for (i = 0; i < NCH(n); i++) {
2950         node *ch = CHILD(n, i);
2951         if (TYPE(ch) == argument) {
2952             if (NCH(ch) == 1)
2953                 nargs++;
2954             else if (TYPE(CHILD(ch, 1)) == comp_for) {
2955                 nargs++;
2956                 if (!maybegenbeg) {
2957                     ast_error(c, ch, "invalid syntax");
2958                     return NULL;
2959                 }
2960                 if (NCH(n) > 1) {
2961                     ast_error(c, ch, "Generator expression must be parenthesized");
2962                     return NULL;
2963                 }
2964             }
2965             else if (TYPE(CHILD(ch, 0)) == STAR)
2966                 nargs++;
2967             else if (TYPE(CHILD(ch, 1)) == COLONEQUAL) {
2968                 nargs++;
2969             }
2970             else
2971                 /* TYPE(CHILD(ch, 0)) == DOUBLESTAR or keyword argument */
2972                 nkeywords++;
2973         }
2974     }
2975 
2976     args = _Py_asdl_seq_new(nargs, c->c_arena);
2977     if (!args)
2978         return NULL;
2979     keywords = _Py_asdl_seq_new(nkeywords, c->c_arena);
2980     if (!keywords)
2981         return NULL;
2982 
2983     nargs = 0;  /* positional arguments + iterable argument unpackings */
2984     nkeywords = 0;  /* keyword arguments + keyword argument unpackings */
2985     ndoublestars = 0;  /* just keyword argument unpackings */
2986     for (i = 0; i < NCH(n); i++) {
2987         node *ch = CHILD(n, i);
2988         if (TYPE(ch) == argument) {
2989             expr_ty e;
2990             node *chch = CHILD(ch, 0);
2991             if (NCH(ch) == 1) {
2992                 /* a positional argument */
2993                 if (nkeywords) {
2994                     if (ndoublestars) {
2995                         ast_error(c, chch,
2996                                   "positional argument follows "
2997                                   "keyword argument unpacking");
2998                     }
2999                     else {
3000                         ast_error(c, chch,
3001                                   "positional argument follows "
3002                                   "keyword argument");
3003                     }
3004                     return NULL;
3005                 }
3006                 e = ast_for_expr(c, chch);
3007                 if (!e)
3008                     return NULL;
3009                 asdl_seq_SET(args, nargs++, e);
3010             }
3011             else if (TYPE(chch) == STAR) {
3012                 /* an iterable argument unpacking */
3013                 expr_ty starred;
3014                 if (ndoublestars) {
3015                     ast_error(c, chch,
3016                               "iterable argument unpacking follows "
3017                               "keyword argument unpacking");
3018                     return NULL;
3019                 }
3020                 e = ast_for_expr(c, CHILD(ch, 1));
3021                 if (!e)
3022                     return NULL;
3023                 starred = Starred(e, Load, LINENO(chch),
3024                         chch->n_col_offset,
3025                         e->end_lineno, e->end_col_offset,
3026                         c->c_arena);
3027                 if (!starred)
3028                     return NULL;
3029                 asdl_seq_SET(args, nargs++, starred);
3030 
3031             }
3032             else if (TYPE(chch) == DOUBLESTAR) {
3033                 /* a keyword argument unpacking */
3034                 keyword_ty kw;
3035                 i++;
3036                 e = ast_for_expr(c, CHILD(ch, 1));
3037                 if (!e)
3038                     return NULL;
3039                 kw = keyword(NULL, e, chch->n_lineno, chch->n_col_offset,
3040                              e->end_lineno, e->end_col_offset, c->c_arena);
3041                 asdl_seq_SET(keywords, nkeywords++, kw);
3042                 ndoublestars++;
3043             }
3044             else if (TYPE(CHILD(ch, 1)) == comp_for) {
3045                 /* the lone generator expression */
3046                 e = copy_location(ast_for_genexp(c, ch), maybegenbeg, closepar);
3047                 if (!e)
3048                     return NULL;
3049                 asdl_seq_SET(args, nargs++, e);
3050             }
3051             else if (TYPE(CHILD(ch, 1)) == COLONEQUAL) {
3052                 /* treat colon equal as positional argument */
3053                 if (nkeywords) {
3054                     if (ndoublestars) {
3055                         ast_error(c, chch,
3056                                   "positional argument follows "
3057                                   "keyword argument unpacking");
3058                     }
3059                     else {
3060                         ast_error(c, chch,
3061                                   "positional argument follows "
3062                                   "keyword argument");
3063                     }
3064                     return NULL;
3065                 }
3066                 e = ast_for_namedexpr(c, ch);
3067                 if (!e)
3068                     return NULL;
3069                 asdl_seq_SET(args, nargs++, e);
3070             }
3071             else {
3072                 /* a keyword argument */
3073                 keyword_ty kw;
3074                 identifier key;
3075 
3076                 // To remain LL(1), the grammar accepts any test (basically, any
3077                 // expression) in the keyword slot of a call site.  So, we need
3078                 // to manually enforce that the keyword is a NAME here.
3079                 static const int name_tree[] = {
3080                     test,
3081                     or_test,
3082                     and_test,
3083                     not_test,
3084                     comparison,
3085                     expr,
3086                     xor_expr,
3087                     and_expr,
3088                     shift_expr,
3089                     arith_expr,
3090                     term,
3091                     factor,
3092                     power,
3093                     atom_expr,
3094                     atom,
3095                     0,
3096                 };
3097                 node *expr_node = chch;
3098                 for (int i = 0; name_tree[i]; i++) {
3099                     if (TYPE(expr_node) != name_tree[i])
3100                         break;
3101                     if (NCH(expr_node) != 1)
3102                         break;
3103                     expr_node = CHILD(expr_node, 0);
3104                 }
3105                 if (TYPE(expr_node) != NAME) {
3106                     ast_error(c, chch,
3107                               "expression cannot contain assignment, "
3108                               "perhaps you meant \"==\"?");
3109                     return NULL;
3110                 }
3111                 key = new_identifier(STR(expr_node), c);
3112                 if (key == NULL) {
3113                     return NULL;
3114                 }
3115                 if (forbidden_name(c, key, chch, 1)) {
3116                     return NULL;
3117                 }
3118                 e = ast_for_expr(c, CHILD(ch, 2));
3119                 if (!e)
3120                     return NULL;
3121                 kw = keyword(key, e, chch->n_lineno, chch->n_col_offset,
3122                              e->end_lineno, e->end_col_offset, c->c_arena);
3123 
3124                 if (!kw)
3125                     return NULL;
3126                 asdl_seq_SET(keywords, nkeywords++, kw);
3127             }
3128         }
3129     }
3130 
3131     return Call(func, args, keywords, LINENO(start), start->n_col_offset,
3132                 closepar->n_end_lineno, closepar->n_end_col_offset, c->c_arena);
3133 }
3134 
3135 static expr_ty
ast_for_testlist(struct compiling * c,const node * n)3136 ast_for_testlist(struct compiling *c, const node* n)
3137 {
3138     /* testlist_comp: test (comp_for | (',' test)* [',']) */
3139     /* testlist: test (',' test)* [','] */
3140     assert(NCH(n) > 0);
3141     if (TYPE(n) == testlist_comp) {
3142         if (NCH(n) > 1)
3143             assert(TYPE(CHILD(n, 1)) != comp_for);
3144     }
3145     else {
3146         assert(TYPE(n) == testlist ||
3147                TYPE(n) == testlist_star_expr);
3148     }
3149     if (NCH(n) == 1)
3150         return ast_for_expr(c, CHILD(n, 0));
3151     else {
3152         asdl_seq *tmp = seq_for_testlist(c, n);
3153         if (!tmp)
3154             return NULL;
3155         return Tuple(tmp, Load, LINENO(n), n->n_col_offset,
3156                      n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3157     }
3158 }
3159 
3160 static stmt_ty
ast_for_expr_stmt(struct compiling * c,const node * n)3161 ast_for_expr_stmt(struct compiling *c, const node *n)
3162 {
3163     REQ(n, expr_stmt);
3164     /* expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) |
3165                      [('=' (yield_expr|testlist_star_expr))+ [TYPE_COMMENT]] )
3166        annassign: ':' test ['=' (yield_expr|testlist)]
3167        testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [',']
3168        augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' |
3169                    '<<=' | '>>=' | '**=' | '//=')
3170        test: ... here starts the operator precedence dance
3171      */
3172     int num = NCH(n);
3173 
3174     if (num == 1) {
3175         expr_ty e = ast_for_testlist(c, CHILD(n, 0));
3176         if (!e)
3177             return NULL;
3178 
3179         return Expr(e, LINENO(n), n->n_col_offset,
3180                     n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3181     }
3182     else if (TYPE(CHILD(n, 1)) == augassign) {
3183         expr_ty expr1, expr2;
3184         operator_ty newoperator;
3185         node *ch = CHILD(n, 0);
3186 
3187         expr1 = ast_for_testlist(c, ch);
3188         if (!expr1)
3189             return NULL;
3190         /* Augmented assignments can only have a name, a subscript, or an
3191           attribute on the left, though, so we have to explicitly check for
3192           those. */
3193         switch (expr1->kind) {
3194             case Name_kind:
3195             case Attribute_kind:
3196             case Subscript_kind:
3197                 break;
3198             default:
3199                 ast_error(c, ch, "'%s' is an illegal expression for augmented assignment",
3200                           get_expr_name(expr1));
3201                 return NULL;
3202         }
3203 
3204         /* set_context checks that most expressions are not the left side. */
3205         if(!set_context(c, expr1, Store, ch)) {
3206             return NULL;
3207         }
3208 
3209         ch = CHILD(n, 2);
3210         if (TYPE(ch) == testlist)
3211             expr2 = ast_for_testlist(c, ch);
3212         else
3213             expr2 = ast_for_expr(c, ch);
3214         if (!expr2)
3215             return NULL;
3216 
3217         newoperator = ast_for_augassign(c, CHILD(n, 1));
3218         if (!newoperator)
3219             return NULL;
3220 
3221         return AugAssign(expr1, newoperator, expr2, LINENO(n), n->n_col_offset,
3222                          n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3223     }
3224     else if (TYPE(CHILD(n, 1)) == annassign) {
3225         expr_ty expr1, expr2, expr3;
3226         node *ch = CHILD(n, 0);
3227         node *deep, *ann = CHILD(n, 1);
3228         int simple = 1;
3229 
3230         /* AnnAssigns are only allowed in Python 3.6 or greater */
3231         if (c->c_feature_version < 6) {
3232             ast_error(c, ch,
3233                       "Variable annotation syntax is only supported in Python 3.6 and greater");
3234             return NULL;
3235         }
3236 
3237         /* we keep track of parens to qualify (x) as expression not name */
3238         deep = ch;
3239         while (NCH(deep) == 1) {
3240             deep = CHILD(deep, 0);
3241         }
3242         if (NCH(deep) > 0 && TYPE(CHILD(deep, 0)) == LPAR) {
3243             simple = 0;
3244         }
3245         expr1 = ast_for_testlist(c, ch);
3246         if (!expr1) {
3247             return NULL;
3248         }
3249         switch (expr1->kind) {
3250             case Name_kind:
3251                 if (forbidden_name(c, expr1->v.Name.id, n, 0)) {
3252                     return NULL;
3253                 }
3254                 expr1->v.Name.ctx = Store;
3255                 break;
3256             case Attribute_kind:
3257                 if (forbidden_name(c, expr1->v.Attribute.attr, n, 1)) {
3258                     return NULL;
3259                 }
3260                 expr1->v.Attribute.ctx = Store;
3261                 break;
3262             case Subscript_kind:
3263                 expr1->v.Subscript.ctx = Store;
3264                 break;
3265             case List_kind:
3266                 ast_error(c, ch,
3267                           "only single target (not list) can be annotated");
3268                 return NULL;
3269             case Tuple_kind:
3270                 ast_error(c, ch,
3271                           "only single target (not tuple) can be annotated");
3272                 return NULL;
3273             default:
3274                 ast_error(c, ch,
3275                           "illegal target for annotation");
3276                 return NULL;
3277         }
3278 
3279         if (expr1->kind != Name_kind) {
3280             simple = 0;
3281         }
3282         ch = CHILD(ann, 1);
3283         expr2 = ast_for_expr(c, ch);
3284         if (!expr2) {
3285             return NULL;
3286         }
3287         if (NCH(ann) == 2) {
3288             return AnnAssign(expr1, expr2, NULL, simple,
3289                              LINENO(n), n->n_col_offset,
3290                              n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3291         }
3292         else {
3293             ch = CHILD(ann, 3);
3294             if (TYPE(ch) == testlist_star_expr) {
3295                 expr3 = ast_for_testlist(c, ch);
3296             }
3297             else {
3298                 expr3 = ast_for_expr(c, ch);
3299             }
3300             if (!expr3) {
3301                 return NULL;
3302             }
3303             return AnnAssign(expr1, expr2, expr3, simple,
3304                              LINENO(n), n->n_col_offset,
3305                              n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3306         }
3307     }
3308     else {
3309         int i, nch_minus_type, has_type_comment;
3310         asdl_seq *targets;
3311         node *value;
3312         expr_ty expression;
3313         string type_comment;
3314 
3315         /* a normal assignment */
3316         REQ(CHILD(n, 1), EQUAL);
3317 
3318         has_type_comment = TYPE(CHILD(n, num - 1)) == TYPE_COMMENT;
3319         nch_minus_type = num - has_type_comment;
3320 
3321         targets = _Py_asdl_seq_new(nch_minus_type / 2, c->c_arena);
3322         if (!targets)
3323             return NULL;
3324         for (i = 0; i < nch_minus_type - 2; i += 2) {
3325             expr_ty e;
3326             node *ch = CHILD(n, i);
3327             if (TYPE(ch) == yield_expr) {
3328                 ast_error(c, ch, "assignment to yield expression not possible");
3329                 return NULL;
3330             }
3331             e = ast_for_testlist(c, ch);
3332             if (!e)
3333               return NULL;
3334 
3335             /* set context to assign */
3336             if (!set_context(c, e, Store, CHILD(n, i)))
3337               return NULL;
3338 
3339             asdl_seq_SET(targets, i / 2, e);
3340         }
3341         value = CHILD(n, nch_minus_type - 1);
3342         if (TYPE(value) == testlist_star_expr)
3343             expression = ast_for_testlist(c, value);
3344         else
3345             expression = ast_for_expr(c, value);
3346         if (!expression)
3347             return NULL;
3348         if (has_type_comment) {
3349             type_comment = NEW_TYPE_COMMENT(CHILD(n, nch_minus_type));
3350             if (!type_comment)
3351                 return NULL;
3352         }
3353         else
3354             type_comment = NULL;
3355         return Assign(targets, expression, type_comment, LINENO(n), n->n_col_offset,
3356                       n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3357     }
3358 }
3359 
3360 
3361 static asdl_seq *
ast_for_exprlist(struct compiling * c,const node * n,expr_context_ty context)3362 ast_for_exprlist(struct compiling *c, const node *n, expr_context_ty context)
3363 {
3364     asdl_seq *seq;
3365     int i;
3366     expr_ty e;
3367 
3368     REQ(n, exprlist);
3369 
3370     seq = _Py_asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
3371     if (!seq)
3372         return NULL;
3373     for (i = 0; i < NCH(n); i += 2) {
3374         e = ast_for_expr(c, CHILD(n, i));
3375         if (!e)
3376             return NULL;
3377         asdl_seq_SET(seq, i / 2, e);
3378         if (context && !set_context(c, e, context, CHILD(n, i)))
3379             return NULL;
3380     }
3381     return seq;
3382 }
3383 
3384 static stmt_ty
ast_for_del_stmt(struct compiling * c,const node * n)3385 ast_for_del_stmt(struct compiling *c, const node *n)
3386 {
3387     asdl_seq *expr_list;
3388 
3389     /* del_stmt: 'del' exprlist */
3390     REQ(n, del_stmt);
3391 
3392     expr_list = ast_for_exprlist(c, CHILD(n, 1), Del);
3393     if (!expr_list)
3394         return NULL;
3395     return Delete(expr_list, LINENO(n), n->n_col_offset,
3396                   n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3397 }
3398 
3399 static stmt_ty
ast_for_flow_stmt(struct compiling * c,const node * n)3400 ast_for_flow_stmt(struct compiling *c, const node *n)
3401 {
3402     /*
3403       flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt
3404                  | yield_stmt
3405       break_stmt: 'break'
3406       continue_stmt: 'continue'
3407       return_stmt: 'return' [testlist]
3408       yield_stmt: yield_expr
3409       yield_expr: 'yield' testlist | 'yield' 'from' test
3410       raise_stmt: 'raise' [test [',' test [',' test]]]
3411     */
3412     node *ch;
3413 
3414     REQ(n, flow_stmt);
3415     ch = CHILD(n, 0);
3416     switch (TYPE(ch)) {
3417         case break_stmt:
3418             return Break(LINENO(n), n->n_col_offset,
3419                          n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3420         case continue_stmt:
3421             return Continue(LINENO(n), n->n_col_offset,
3422                             n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3423         case yield_stmt: { /* will reduce to yield_expr */
3424             expr_ty exp = ast_for_expr(c, CHILD(ch, 0));
3425             if (!exp)
3426                 return NULL;
3427             return Expr(exp, LINENO(n), n->n_col_offset,
3428                         n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3429         }
3430         case return_stmt:
3431             if (NCH(ch) == 1)
3432                 return Return(NULL, LINENO(n), n->n_col_offset,
3433                               n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3434             else {
3435                 expr_ty expression = ast_for_testlist(c, CHILD(ch, 1));
3436                 if (!expression)
3437                     return NULL;
3438                 return Return(expression, LINENO(n), n->n_col_offset,
3439                               n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3440             }
3441         case raise_stmt:
3442             if (NCH(ch) == 1)
3443                 return Raise(NULL, NULL, LINENO(n), n->n_col_offset,
3444                              n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3445             else if (NCH(ch) >= 2) {
3446                 expr_ty cause = NULL;
3447                 expr_ty expression = ast_for_expr(c, CHILD(ch, 1));
3448                 if (!expression)
3449                     return NULL;
3450                 if (NCH(ch) == 4) {
3451                     cause = ast_for_expr(c, CHILD(ch, 3));
3452                     if (!cause)
3453                         return NULL;
3454                 }
3455                 return Raise(expression, cause, LINENO(n), n->n_col_offset,
3456                              n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3457             }
3458             /* fall through */
3459         default:
3460             PyErr_Format(PyExc_SystemError,
3461                          "unexpected flow_stmt: %d", TYPE(ch));
3462             return NULL;
3463     }
3464 }
3465 
3466 static alias_ty
alias_for_import_name(struct compiling * c,const node * n,int store)3467 alias_for_import_name(struct compiling *c, const node *n, int store)
3468 {
3469     /*
3470       import_as_name: NAME ['as' NAME]
3471       dotted_as_name: dotted_name ['as' NAME]
3472       dotted_name: NAME ('.' NAME)*
3473     */
3474     identifier str, name;
3475 
3476  loop:
3477     switch (TYPE(n)) {
3478         case import_as_name: {
3479             node *name_node = CHILD(n, 0);
3480             str = NULL;
3481             name = NEW_IDENTIFIER(name_node);
3482             if (!name)
3483                 return NULL;
3484             if (NCH(n) == 3) {
3485                 node *str_node = CHILD(n, 2);
3486                 str = NEW_IDENTIFIER(str_node);
3487                 if (!str)
3488                     return NULL;
3489                 if (store && forbidden_name(c, str, str_node, 0))
3490                     return NULL;
3491             }
3492             else {
3493                 if (forbidden_name(c, name, name_node, 0))
3494                     return NULL;
3495             }
3496             return alias(name, str, c->c_arena);
3497         }
3498         case dotted_as_name:
3499             if (NCH(n) == 1) {
3500                 n = CHILD(n, 0);
3501                 goto loop;
3502             }
3503             else {
3504                 node *asname_node = CHILD(n, 2);
3505                 alias_ty a = alias_for_import_name(c, CHILD(n, 0), 0);
3506                 if (!a)
3507                     return NULL;
3508                 assert(!a->asname);
3509                 a->asname = NEW_IDENTIFIER(asname_node);
3510                 if (!a->asname)
3511                     return NULL;
3512                 if (forbidden_name(c, a->asname, asname_node, 0))
3513                     return NULL;
3514                 return a;
3515             }
3516         case dotted_name:
3517             if (NCH(n) == 1) {
3518                 node *name_node = CHILD(n, 0);
3519                 name = NEW_IDENTIFIER(name_node);
3520                 if (!name)
3521                     return NULL;
3522                 if (store && forbidden_name(c, name, name_node, 0))
3523                     return NULL;
3524                 return alias(name, NULL, c->c_arena);
3525             }
3526             else {
3527                 /* Create a string of the form "a.b.c" */
3528                 int i;
3529                 size_t len;
3530                 char *s;
3531                 PyObject *uni;
3532 
3533                 len = 0;
3534                 for (i = 0; i < NCH(n); i += 2)
3535                     /* length of string plus one for the dot */
3536                     len += strlen(STR(CHILD(n, i))) + 1;
3537                 len--; /* the last name doesn't have a dot */
3538                 str = PyBytes_FromStringAndSize(NULL, len);
3539                 if (!str)
3540                     return NULL;
3541                 s = PyBytes_AS_STRING(str);
3542                 if (!s)
3543                     return NULL;
3544                 for (i = 0; i < NCH(n); i += 2) {
3545                     char *sch = STR(CHILD(n, i));
3546                     strcpy(s, STR(CHILD(n, i)));
3547                     s += strlen(sch);
3548                     *s++ = '.';
3549                 }
3550                 --s;
3551                 *s = '\0';
3552                 uni = PyUnicode_DecodeUTF8(PyBytes_AS_STRING(str),
3553                                            PyBytes_GET_SIZE(str),
3554                                            NULL);
3555                 Py_DECREF(str);
3556                 if (!uni)
3557                     return NULL;
3558                 str = uni;
3559                 PyUnicode_InternInPlace(&str);
3560                 if (PyArena_AddPyObject(c->c_arena, str) < 0) {
3561                     Py_DECREF(str);
3562                     return NULL;
3563                 }
3564                 return alias(str, NULL, c->c_arena);
3565             }
3566         case STAR:
3567             str = PyUnicode_InternFromString("*");
3568             if (!str)
3569                 return NULL;
3570             if (PyArena_AddPyObject(c->c_arena, str) < 0) {
3571                 Py_DECREF(str);
3572                 return NULL;
3573             }
3574             return alias(str, NULL, c->c_arena);
3575         default:
3576             PyErr_Format(PyExc_SystemError,
3577                          "unexpected import name: %d", TYPE(n));
3578             return NULL;
3579     }
3580 }
3581 
3582 static stmt_ty
ast_for_import_stmt(struct compiling * c,const node * n)3583 ast_for_import_stmt(struct compiling *c, const node *n)
3584 {
3585     /*
3586       import_stmt: import_name | import_from
3587       import_name: 'import' dotted_as_names
3588       import_from: 'from' (('.' | '...')* dotted_name | ('.' | '...')+)
3589                    'import' ('*' | '(' import_as_names ')' | import_as_names)
3590     */
3591     int lineno;
3592     int col_offset;
3593     int i;
3594     asdl_seq *aliases;
3595 
3596     REQ(n, import_stmt);
3597     lineno = LINENO(n);
3598     col_offset = n->n_col_offset;
3599     n = CHILD(n, 0);
3600     if (TYPE(n) == import_name) {
3601         n = CHILD(n, 1);
3602         REQ(n, dotted_as_names);
3603         aliases = _Py_asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
3604         if (!aliases)
3605                 return NULL;
3606         for (i = 0; i < NCH(n); i += 2) {
3607             alias_ty import_alias = alias_for_import_name(c, CHILD(n, i), 1);
3608             if (!import_alias)
3609                 return NULL;
3610             asdl_seq_SET(aliases, i / 2, import_alias);
3611         }
3612         // Even though n is modified above, the end position is not changed
3613         return Import(aliases, lineno, col_offset,
3614                       n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3615     }
3616     else if (TYPE(n) == import_from) {
3617         int n_children;
3618         int idx, ndots = 0;
3619         const node *n_copy = n;
3620         alias_ty mod = NULL;
3621         identifier modname = NULL;
3622 
3623        /* Count the number of dots (for relative imports) and check for the
3624           optional module name */
3625         for (idx = 1; idx < NCH(n); idx++) {
3626             if (TYPE(CHILD(n, idx)) == dotted_name) {
3627                 mod = alias_for_import_name(c, CHILD(n, idx), 0);
3628                 if (!mod)
3629                     return NULL;
3630                 idx++;
3631                 break;
3632             } else if (TYPE(CHILD(n, idx)) == ELLIPSIS) {
3633                 /* three consecutive dots are tokenized as one ELLIPSIS */
3634                 ndots += 3;
3635                 continue;
3636             } else if (TYPE(CHILD(n, idx)) != DOT) {
3637                 break;
3638             }
3639             ndots++;
3640         }
3641         idx++; /* skip over the 'import' keyword */
3642         switch (TYPE(CHILD(n, idx))) {
3643         case STAR:
3644             /* from ... import * */
3645             n = CHILD(n, idx);
3646             n_children = 1;
3647             break;
3648         case LPAR:
3649             /* from ... import (x, y, z) */
3650             n = CHILD(n, idx + 1);
3651             n_children = NCH(n);
3652             break;
3653         case import_as_names:
3654             /* from ... import x, y, z */
3655             n = CHILD(n, idx);
3656             n_children = NCH(n);
3657             if (n_children % 2 == 0) {
3658                 ast_error(c, n,
3659                           "trailing comma not allowed without"
3660                           " surrounding parentheses");
3661                 return NULL;
3662             }
3663             break;
3664         default:
3665             ast_error(c, n, "Unexpected node-type in from-import");
3666             return NULL;
3667         }
3668 
3669         aliases = _Py_asdl_seq_new((n_children + 1) / 2, c->c_arena);
3670         if (!aliases)
3671             return NULL;
3672 
3673         /* handle "from ... import *" special b/c there's no children */
3674         if (TYPE(n) == STAR) {
3675             alias_ty import_alias = alias_for_import_name(c, n, 1);
3676             if (!import_alias)
3677                 return NULL;
3678             asdl_seq_SET(aliases, 0, import_alias);
3679         }
3680         else {
3681             for (i = 0; i < NCH(n); i += 2) {
3682                 alias_ty import_alias = alias_for_import_name(c, CHILD(n, i), 1);
3683                 if (!import_alias)
3684                     return NULL;
3685                 asdl_seq_SET(aliases, i / 2, import_alias);
3686             }
3687         }
3688         if (mod != NULL)
3689             modname = mod->name;
3690         return ImportFrom(modname, aliases, ndots, lineno, col_offset,
3691                           n_copy->n_end_lineno, n_copy->n_end_col_offset,
3692                           c->c_arena);
3693     }
3694     PyErr_Format(PyExc_SystemError,
3695                  "unknown import statement: starts with command '%s'",
3696                  STR(CHILD(n, 0)));
3697     return NULL;
3698 }
3699 
3700 static stmt_ty
ast_for_global_stmt(struct compiling * c,const node * n)3701 ast_for_global_stmt(struct compiling *c, const node *n)
3702 {
3703     /* global_stmt: 'global' NAME (',' NAME)* */
3704     identifier name;
3705     asdl_seq *s;
3706     int i;
3707 
3708     REQ(n, global_stmt);
3709     s = _Py_asdl_seq_new(NCH(n) / 2, c->c_arena);
3710     if (!s)
3711         return NULL;
3712     for (i = 1; i < NCH(n); i += 2) {
3713         name = NEW_IDENTIFIER(CHILD(n, i));
3714         if (!name)
3715             return NULL;
3716         asdl_seq_SET(s, i / 2, name);
3717     }
3718     return Global(s, LINENO(n), n->n_col_offset,
3719                   n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3720 }
3721 
3722 static stmt_ty
ast_for_nonlocal_stmt(struct compiling * c,const node * n)3723 ast_for_nonlocal_stmt(struct compiling *c, const node *n)
3724 {
3725     /* nonlocal_stmt: 'nonlocal' NAME (',' NAME)* */
3726     identifier name;
3727     asdl_seq *s;
3728     int i;
3729 
3730     REQ(n, nonlocal_stmt);
3731     s = _Py_asdl_seq_new(NCH(n) / 2, c->c_arena);
3732     if (!s)
3733         return NULL;
3734     for (i = 1; i < NCH(n); i += 2) {
3735         name = NEW_IDENTIFIER(CHILD(n, i));
3736         if (!name)
3737             return NULL;
3738         asdl_seq_SET(s, i / 2, name);
3739     }
3740     return Nonlocal(s, LINENO(n), n->n_col_offset,
3741                     n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3742 }
3743 
3744 static stmt_ty
ast_for_assert_stmt(struct compiling * c,const node * n)3745 ast_for_assert_stmt(struct compiling *c, const node *n)
3746 {
3747     /* assert_stmt: 'assert' test [',' test] */
3748     REQ(n, assert_stmt);
3749     if (NCH(n) == 2) {
3750         expr_ty expression = ast_for_expr(c, CHILD(n, 1));
3751         if (!expression)
3752             return NULL;
3753         return Assert(expression, NULL, LINENO(n), n->n_col_offset,
3754                       n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3755     }
3756     else if (NCH(n) == 4) {
3757         expr_ty expr1, expr2;
3758 
3759         expr1 = ast_for_expr(c, CHILD(n, 1));
3760         if (!expr1)
3761             return NULL;
3762         expr2 = ast_for_expr(c, CHILD(n, 3));
3763         if (!expr2)
3764             return NULL;
3765 
3766         return Assert(expr1, expr2, LINENO(n), n->n_col_offset,
3767                       n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3768     }
3769     PyErr_Format(PyExc_SystemError,
3770                  "improper number of parts to 'assert' statement: %d",
3771                  NCH(n));
3772     return NULL;
3773 }
3774 
3775 static asdl_seq *
ast_for_suite(struct compiling * c,const node * n)3776 ast_for_suite(struct compiling *c, const node *n)
3777 {
3778     /* suite: simple_stmt | NEWLINE [TYPE_COMMENT NEWLINE] INDENT stmt+ DEDENT */
3779     asdl_seq *seq;
3780     stmt_ty s;
3781     int i, total, num, end, pos = 0;
3782     node *ch;
3783 
3784     if (TYPE(n) != func_body_suite) {
3785         REQ(n, suite);
3786     }
3787 
3788     total = num_stmts(n);
3789     seq = _Py_asdl_seq_new(total, c->c_arena);
3790     if (!seq)
3791         return NULL;
3792     if (TYPE(CHILD(n, 0)) == simple_stmt) {
3793         n = CHILD(n, 0);
3794         /* simple_stmt always ends with a NEWLINE,
3795            and may have a trailing SEMI
3796         */
3797         end = NCH(n) - 1;
3798         if (TYPE(CHILD(n, end - 1)) == SEMI)
3799             end--;
3800         /* loop by 2 to skip semi-colons */
3801         for (i = 0; i < end; i += 2) {
3802             ch = CHILD(n, i);
3803             s = ast_for_stmt(c, ch);
3804             if (!s)
3805                 return NULL;
3806             asdl_seq_SET(seq, pos++, s);
3807         }
3808     }
3809     else {
3810         i = 2;
3811         if (TYPE(CHILD(n, 1)) == TYPE_COMMENT) {
3812             i += 2;
3813             REQ(CHILD(n, 2), NEWLINE);
3814         }
3815 
3816         for (; i < (NCH(n) - 1); i++) {
3817             ch = CHILD(n, i);
3818             REQ(ch, stmt);
3819             num = num_stmts(ch);
3820             if (num == 1) {
3821                 /* small_stmt or compound_stmt with only one child */
3822                 s = ast_for_stmt(c, ch);
3823                 if (!s)
3824                     return NULL;
3825                 asdl_seq_SET(seq, pos++, s);
3826             }
3827             else {
3828                 int j;
3829                 ch = CHILD(ch, 0);
3830                 REQ(ch, simple_stmt);
3831                 for (j = 0; j < NCH(ch); j += 2) {
3832                     /* statement terminates with a semi-colon ';' */
3833                     if (NCH(CHILD(ch, j)) == 0) {
3834                         assert((j + 1) == NCH(ch));
3835                         break;
3836                     }
3837                     s = ast_for_stmt(c, CHILD(ch, j));
3838                     if (!s)
3839                         return NULL;
3840                     asdl_seq_SET(seq, pos++, s);
3841                 }
3842             }
3843         }
3844     }
3845     assert(pos == seq->size);
3846     return seq;
3847 }
3848 
3849 static void
get_last_end_pos(asdl_seq * s,int * end_lineno,int * end_col_offset)3850 get_last_end_pos(asdl_seq *s, int *end_lineno, int *end_col_offset)
3851 {
3852     Py_ssize_t tot = asdl_seq_LEN(s);
3853     // There must be no empty suites.
3854     assert(tot > 0);
3855     stmt_ty last = asdl_seq_GET(s, tot - 1);
3856     *end_lineno = last->end_lineno;
3857     *end_col_offset = last->end_col_offset;
3858 }
3859 
3860 static stmt_ty
ast_for_if_stmt(struct compiling * c,const node * n)3861 ast_for_if_stmt(struct compiling *c, const node *n)
3862 {
3863     /* if_stmt: 'if' test ':' suite ('elif' test ':' suite)*
3864        ['else' ':' suite]
3865     */
3866     char *s;
3867     int end_lineno, end_col_offset;
3868 
3869     REQ(n, if_stmt);
3870 
3871     if (NCH(n) == 4) {
3872         expr_ty expression;
3873         asdl_seq *suite_seq;
3874 
3875         expression = ast_for_expr(c, CHILD(n, 1));
3876         if (!expression)
3877             return NULL;
3878         suite_seq = ast_for_suite(c, CHILD(n, 3));
3879         if (!suite_seq)
3880             return NULL;
3881         get_last_end_pos(suite_seq, &end_lineno, &end_col_offset);
3882 
3883         return If(expression, suite_seq, NULL, LINENO(n), n->n_col_offset,
3884                   end_lineno, end_col_offset, c->c_arena);
3885     }
3886 
3887     s = STR(CHILD(n, 4));
3888     /* s[2], the third character in the string, will be
3889        's' for el_s_e, or
3890        'i' for el_i_f
3891     */
3892     if (s[2] == 's') {
3893         expr_ty expression;
3894         asdl_seq *seq1, *seq2;
3895 
3896         expression = ast_for_expr(c, CHILD(n, 1));
3897         if (!expression)
3898             return NULL;
3899         seq1 = ast_for_suite(c, CHILD(n, 3));
3900         if (!seq1)
3901             return NULL;
3902         seq2 = ast_for_suite(c, CHILD(n, 6));
3903         if (!seq2)
3904             return NULL;
3905         get_last_end_pos(seq2, &end_lineno, &end_col_offset);
3906 
3907         return If(expression, seq1, seq2, LINENO(n), n->n_col_offset,
3908                   end_lineno, end_col_offset, c->c_arena);
3909     }
3910     else if (s[2] == 'i') {
3911         int i, n_elif, has_else = 0;
3912         expr_ty expression;
3913         asdl_seq *suite_seq;
3914         asdl_seq *orelse = NULL;
3915         n_elif = NCH(n) - 4;
3916         /* must reference the child n_elif+1 since 'else' token is third,
3917            not fourth, child from the end. */
3918         if (TYPE(CHILD(n, (n_elif + 1))) == NAME
3919             && STR(CHILD(n, (n_elif + 1)))[2] == 's') {
3920             has_else = 1;
3921             n_elif -= 3;
3922         }
3923         n_elif /= 4;
3924 
3925         if (has_else) {
3926             asdl_seq *suite_seq2;
3927 
3928             orelse = _Py_asdl_seq_new(1, c->c_arena);
3929             if (!orelse)
3930                 return NULL;
3931             expression = ast_for_expr(c, CHILD(n, NCH(n) - 6));
3932             if (!expression)
3933                 return NULL;
3934             suite_seq = ast_for_suite(c, CHILD(n, NCH(n) - 4));
3935             if (!suite_seq)
3936                 return NULL;
3937             suite_seq2 = ast_for_suite(c, CHILD(n, NCH(n) - 1));
3938             if (!suite_seq2)
3939                 return NULL;
3940             get_last_end_pos(suite_seq2, &end_lineno, &end_col_offset);
3941 
3942             asdl_seq_SET(orelse, 0,
3943                          If(expression, suite_seq, suite_seq2,
3944                             LINENO(CHILD(n, NCH(n) - 7)),
3945                             CHILD(n, NCH(n) - 7)->n_col_offset,
3946                             end_lineno, end_col_offset, c->c_arena));
3947             /* the just-created orelse handled the last elif */
3948             n_elif--;
3949         }
3950 
3951         for (i = 0; i < n_elif; i++) {
3952             int off = 5 + (n_elif - i - 1) * 4;
3953             asdl_seq *newobj = _Py_asdl_seq_new(1, c->c_arena);
3954             if (!newobj)
3955                 return NULL;
3956             expression = ast_for_expr(c, CHILD(n, off));
3957             if (!expression)
3958                 return NULL;
3959             suite_seq = ast_for_suite(c, CHILD(n, off + 2));
3960             if (!suite_seq)
3961                 return NULL;
3962 
3963             if (orelse != NULL) {
3964                 get_last_end_pos(orelse, &end_lineno, &end_col_offset);
3965             } else {
3966                 get_last_end_pos(suite_seq, &end_lineno, &end_col_offset);
3967             }
3968             asdl_seq_SET(newobj, 0,
3969                          If(expression, suite_seq, orelse,
3970                             LINENO(CHILD(n, off - 1)),
3971                             CHILD(n, off - 1)->n_col_offset,
3972                             end_lineno, end_col_offset, c->c_arena));
3973             orelse = newobj;
3974         }
3975         expression = ast_for_expr(c, CHILD(n, 1));
3976         if (!expression)
3977             return NULL;
3978         suite_seq = ast_for_suite(c, CHILD(n, 3));
3979         if (!suite_seq)
3980             return NULL;
3981         get_last_end_pos(orelse, &end_lineno, &end_col_offset);
3982         return If(expression, suite_seq, orelse,
3983                   LINENO(n), n->n_col_offset,
3984                   end_lineno, end_col_offset, c->c_arena);
3985     }
3986 
3987     PyErr_Format(PyExc_SystemError,
3988                  "unexpected token in 'if' statement: %s", s);
3989     return NULL;
3990 }
3991 
3992 static stmt_ty
ast_for_while_stmt(struct compiling * c,const node * n)3993 ast_for_while_stmt(struct compiling *c, const node *n)
3994 {
3995     /* while_stmt: 'while' test ':' suite ['else' ':' suite] */
3996     REQ(n, while_stmt);
3997     int end_lineno, end_col_offset;
3998 
3999     if (NCH(n) == 4) {
4000         expr_ty expression;
4001         asdl_seq *suite_seq;
4002 
4003         expression = ast_for_expr(c, CHILD(n, 1));
4004         if (!expression)
4005             return NULL;
4006         suite_seq = ast_for_suite(c, CHILD(n, 3));
4007         if (!suite_seq)
4008             return NULL;
4009         get_last_end_pos(suite_seq, &end_lineno, &end_col_offset);
4010         return While(expression, suite_seq, NULL, LINENO(n), n->n_col_offset,
4011                      end_lineno, end_col_offset, c->c_arena);
4012     }
4013     else if (NCH(n) == 7) {
4014         expr_ty expression;
4015         asdl_seq *seq1, *seq2;
4016 
4017         expression = ast_for_expr(c, CHILD(n, 1));
4018         if (!expression)
4019             return NULL;
4020         seq1 = ast_for_suite(c, CHILD(n, 3));
4021         if (!seq1)
4022             return NULL;
4023         seq2 = ast_for_suite(c, CHILD(n, 6));
4024         if (!seq2)
4025             return NULL;
4026         get_last_end_pos(seq2, &end_lineno, &end_col_offset);
4027 
4028         return While(expression, seq1, seq2, LINENO(n), n->n_col_offset,
4029                      end_lineno, end_col_offset, c->c_arena);
4030     }
4031 
4032     PyErr_Format(PyExc_SystemError,
4033                  "wrong number of tokens for 'while' statement: %d",
4034                  NCH(n));
4035     return NULL;
4036 }
4037 
4038 static stmt_ty
ast_for_for_stmt(struct compiling * c,const node * n0,bool is_async)4039 ast_for_for_stmt(struct compiling *c, const node *n0, bool is_async)
4040 {
4041     const node * const n = is_async ? CHILD(n0, 1) : n0;
4042     asdl_seq *_target, *seq = NULL, *suite_seq;
4043     expr_ty expression;
4044     expr_ty target, first;
4045     const node *node_target;
4046     int end_lineno, end_col_offset;
4047     int has_type_comment;
4048     string type_comment;
4049 
4050     if (is_async && c->c_feature_version < 5) {
4051         ast_error(c, n,
4052                   "Async for loops are only supported in Python 3.5 and greater");
4053         return NULL;
4054     }
4055 
4056     /* for_stmt: 'for' exprlist 'in' testlist ':' [TYPE_COMMENT] suite ['else' ':' suite] */
4057     REQ(n, for_stmt);
4058 
4059     has_type_comment = TYPE(CHILD(n, 5)) == TYPE_COMMENT;
4060 
4061     if (NCH(n) == 9 + has_type_comment) {
4062         seq = ast_for_suite(c, CHILD(n, 8 + has_type_comment));
4063         if (!seq)
4064             return NULL;
4065     }
4066 
4067     node_target = CHILD(n, 1);
4068     _target = ast_for_exprlist(c, node_target, Store);
4069     if (!_target)
4070         return NULL;
4071     /* Check the # of children rather than the length of _target, since
4072        for x, in ... has 1 element in _target, but still requires a Tuple. */
4073     first = (expr_ty)asdl_seq_GET(_target, 0);
4074     if (NCH(node_target) == 1)
4075         target = first;
4076     else
4077         target = Tuple(_target, Store, first->lineno, first->col_offset,
4078                        node_target->n_end_lineno, node_target->n_end_col_offset,
4079                        c->c_arena);
4080 
4081     expression = ast_for_testlist(c, CHILD(n, 3));
4082     if (!expression)
4083         return NULL;
4084     suite_seq = ast_for_suite(c, CHILD(n, 5 + has_type_comment));
4085     if (!suite_seq)
4086         return NULL;
4087 
4088     if (seq != NULL) {
4089         get_last_end_pos(seq, &end_lineno, &end_col_offset);
4090     } else {
4091         get_last_end_pos(suite_seq, &end_lineno, &end_col_offset);
4092     }
4093 
4094     if (has_type_comment) {
4095         type_comment = NEW_TYPE_COMMENT(CHILD(n, 5));
4096         if (!type_comment)
4097             return NULL;
4098     }
4099     else
4100         type_comment = NULL;
4101 
4102     if (is_async)
4103         return AsyncFor(target, expression, suite_seq, seq, type_comment,
4104                         LINENO(n0), n0->n_col_offset,
4105                         end_lineno, end_col_offset, c->c_arena);
4106     else
4107         return For(target, expression, suite_seq, seq, type_comment,
4108                    LINENO(n), n->n_col_offset,
4109                    end_lineno, end_col_offset, c->c_arena);
4110 }
4111 
4112 static excepthandler_ty
ast_for_except_clause(struct compiling * c,const node * exc,node * body)4113 ast_for_except_clause(struct compiling *c, const node *exc, node *body)
4114 {
4115     /* except_clause: 'except' [test ['as' test]] */
4116     int end_lineno, end_col_offset;
4117     REQ(exc, except_clause);
4118     REQ(body, suite);
4119 
4120     if (NCH(exc) == 1) {
4121         asdl_seq *suite_seq = ast_for_suite(c, body);
4122         if (!suite_seq)
4123             return NULL;
4124         get_last_end_pos(suite_seq, &end_lineno, &end_col_offset);
4125 
4126         return ExceptHandler(NULL, NULL, suite_seq, LINENO(exc),
4127                              exc->n_col_offset,
4128                              end_lineno, end_col_offset, c->c_arena);
4129     }
4130     else if (NCH(exc) == 2) {
4131         expr_ty expression;
4132         asdl_seq *suite_seq;
4133 
4134         expression = ast_for_expr(c, CHILD(exc, 1));
4135         if (!expression)
4136             return NULL;
4137         suite_seq = ast_for_suite(c, body);
4138         if (!suite_seq)
4139             return NULL;
4140         get_last_end_pos(suite_seq, &end_lineno, &end_col_offset);
4141 
4142         return ExceptHandler(expression, NULL, suite_seq, LINENO(exc),
4143                              exc->n_col_offset,
4144                              end_lineno, end_col_offset, c->c_arena);
4145     }
4146     else if (NCH(exc) == 4) {
4147         asdl_seq *suite_seq;
4148         expr_ty expression;
4149         identifier e = NEW_IDENTIFIER(CHILD(exc, 3));
4150         if (!e)
4151             return NULL;
4152         if (forbidden_name(c, e, CHILD(exc, 3), 0))
4153             return NULL;
4154         expression = ast_for_expr(c, CHILD(exc, 1));
4155         if (!expression)
4156             return NULL;
4157         suite_seq = ast_for_suite(c, body);
4158         if (!suite_seq)
4159             return NULL;
4160         get_last_end_pos(suite_seq, &end_lineno, &end_col_offset);
4161 
4162         return ExceptHandler(expression, e, suite_seq, LINENO(exc),
4163                              exc->n_col_offset,
4164                              end_lineno, end_col_offset, c->c_arena);
4165     }
4166 
4167     PyErr_Format(PyExc_SystemError,
4168                  "wrong number of children for 'except' clause: %d",
4169                  NCH(exc));
4170     return NULL;
4171 }
4172 
4173 static stmt_ty
ast_for_try_stmt(struct compiling * c,const node * n)4174 ast_for_try_stmt(struct compiling *c, const node *n)
4175 {
4176     const int nch = NCH(n);
4177     int end_lineno, end_col_offset, n_except = (nch - 3)/3;
4178     asdl_seq *body, *handlers = NULL, *orelse = NULL, *finally = NULL;
4179     excepthandler_ty last_handler;
4180 
4181     REQ(n, try_stmt);
4182 
4183     body = ast_for_suite(c, CHILD(n, 2));
4184     if (body == NULL)
4185         return NULL;
4186 
4187     if (TYPE(CHILD(n, nch - 3)) == NAME) {
4188         if (strcmp(STR(CHILD(n, nch - 3)), "finally") == 0) {
4189             if (nch >= 9 && TYPE(CHILD(n, nch - 6)) == NAME) {
4190                 /* we can assume it's an "else",
4191                    because nch >= 9 for try-else-finally and
4192                    it would otherwise have a type of except_clause */
4193                 orelse = ast_for_suite(c, CHILD(n, nch - 4));
4194                 if (orelse == NULL)
4195                     return NULL;
4196                 n_except--;
4197             }
4198 
4199             finally = ast_for_suite(c, CHILD(n, nch - 1));
4200             if (finally == NULL)
4201                 return NULL;
4202             n_except--;
4203         }
4204         else {
4205             /* we can assume it's an "else",
4206                otherwise it would have a type of except_clause */
4207             orelse = ast_for_suite(c, CHILD(n, nch - 1));
4208             if (orelse == NULL)
4209                 return NULL;
4210             n_except--;
4211         }
4212     }
4213     else if (TYPE(CHILD(n, nch - 3)) != except_clause) {
4214         ast_error(c, n, "malformed 'try' statement");
4215         return NULL;
4216     }
4217 
4218     if (n_except > 0) {
4219         int i;
4220         /* process except statements to create a try ... except */
4221         handlers = _Py_asdl_seq_new(n_except, c->c_arena);
4222         if (handlers == NULL)
4223             return NULL;
4224 
4225         for (i = 0; i < n_except; i++) {
4226             excepthandler_ty e = ast_for_except_clause(c, CHILD(n, 3 + i * 3),
4227                                                        CHILD(n, 5 + i * 3));
4228             if (!e)
4229                 return NULL;
4230             asdl_seq_SET(handlers, i, e);
4231         }
4232     }
4233 
4234     assert(finally != NULL || asdl_seq_LEN(handlers));
4235         if (finally != NULL) {
4236         // finally is always last
4237         get_last_end_pos(finally, &end_lineno, &end_col_offset);
4238     } else if (orelse != NULL) {
4239         // otherwise else is last
4240         get_last_end_pos(orelse, &end_lineno, &end_col_offset);
4241     } else {
4242         // inline the get_last_end_pos logic due to layout mismatch
4243         last_handler = (excepthandler_ty) asdl_seq_GET(handlers, n_except - 1);
4244         end_lineno = last_handler->end_lineno;
4245         end_col_offset = last_handler->end_col_offset;
4246     }
4247     return Try(body, handlers, orelse, finally, LINENO(n), n->n_col_offset,
4248                end_lineno, end_col_offset, c->c_arena);
4249 }
4250 
4251 /* with_item: test ['as' expr] */
4252 static withitem_ty
ast_for_with_item(struct compiling * c,const node * n)4253 ast_for_with_item(struct compiling *c, const node *n)
4254 {
4255     expr_ty context_expr, optional_vars = NULL;
4256 
4257     REQ(n, with_item);
4258     context_expr = ast_for_expr(c, CHILD(n, 0));
4259     if (!context_expr)
4260         return NULL;
4261     if (NCH(n) == 3) {
4262         optional_vars = ast_for_expr(c, CHILD(n, 2));
4263 
4264         if (!optional_vars) {
4265             return NULL;
4266         }
4267         if (!set_context(c, optional_vars, Store, n)) {
4268             return NULL;
4269         }
4270     }
4271 
4272     return withitem(context_expr, optional_vars, c->c_arena);
4273 }
4274 
4275 /* with_stmt: 'with' with_item (',' with_item)*  ':' [TYPE_COMMENT] suite */
4276 static stmt_ty
ast_for_with_stmt(struct compiling * c,const node * n0,bool is_async)4277 ast_for_with_stmt(struct compiling *c, const node *n0, bool is_async)
4278 {
4279     const node * const n = is_async ? CHILD(n0, 1) : n0;
4280     int i, n_items, nch_minus_type, has_type_comment, end_lineno, end_col_offset;
4281     asdl_seq *items, *body;
4282     string type_comment;
4283 
4284     if (is_async && c->c_feature_version < 5) {
4285         ast_error(c, n,
4286                   "Async with statements are only supported in Python 3.5 and greater");
4287         return NULL;
4288     }
4289 
4290     REQ(n, with_stmt);
4291 
4292     has_type_comment = TYPE(CHILD(n, NCH(n) - 2)) == TYPE_COMMENT;
4293     nch_minus_type = NCH(n) - has_type_comment;
4294 
4295     n_items = (nch_minus_type - 2) / 2;
4296     items = _Py_asdl_seq_new(n_items, c->c_arena);
4297     if (!items)
4298         return NULL;
4299     for (i = 1; i < nch_minus_type - 2; i += 2) {
4300         withitem_ty item = ast_for_with_item(c, CHILD(n, i));
4301         if (!item)
4302             return NULL;
4303         asdl_seq_SET(items, (i - 1) / 2, item);
4304     }
4305 
4306     body = ast_for_suite(c, CHILD(n, NCH(n) - 1));
4307     if (!body)
4308         return NULL;
4309     get_last_end_pos(body, &end_lineno, &end_col_offset);
4310 
4311     if (has_type_comment) {
4312         type_comment = NEW_TYPE_COMMENT(CHILD(n, NCH(n) - 2));
4313         if (!type_comment)
4314             return NULL;
4315     }
4316     else
4317         type_comment = NULL;
4318 
4319     if (is_async)
4320         return AsyncWith(items, body, type_comment, LINENO(n0), n0->n_col_offset,
4321                          end_lineno, end_col_offset, c->c_arena);
4322     else
4323         return With(items, body, type_comment, LINENO(n), n->n_col_offset,
4324                     end_lineno, end_col_offset, c->c_arena);
4325 }
4326 
4327 static stmt_ty
ast_for_classdef(struct compiling * c,const node * n,asdl_seq * decorator_seq)4328 ast_for_classdef(struct compiling *c, const node *n, asdl_seq *decorator_seq)
4329 {
4330     /* classdef: 'class' NAME ['(' arglist ')'] ':' suite */
4331     PyObject *classname;
4332     asdl_seq *s;
4333     expr_ty call;
4334     int end_lineno, end_col_offset;
4335 
4336     REQ(n, classdef);
4337 
4338     if (NCH(n) == 4) { /* class NAME ':' suite */
4339         s = ast_for_suite(c, CHILD(n, 3));
4340         if (!s)
4341             return NULL;
4342         get_last_end_pos(s, &end_lineno, &end_col_offset);
4343 
4344         classname = NEW_IDENTIFIER(CHILD(n, 1));
4345         if (!classname)
4346             return NULL;
4347         if (forbidden_name(c, classname, CHILD(n, 3), 0))
4348             return NULL;
4349         return ClassDef(classname, NULL, NULL, s, decorator_seq,
4350                         LINENO(n), n->n_col_offset,
4351                         end_lineno, end_col_offset, c->c_arena);
4352     }
4353 
4354     if (TYPE(CHILD(n, 3)) == RPAR) { /* class NAME '(' ')' ':' suite */
4355         s = ast_for_suite(c, CHILD(n, 5));
4356         if (!s)
4357             return NULL;
4358         get_last_end_pos(s, &end_lineno, &end_col_offset);
4359 
4360         classname = NEW_IDENTIFIER(CHILD(n, 1));
4361         if (!classname)
4362             return NULL;
4363         if (forbidden_name(c, classname, CHILD(n, 3), 0))
4364             return NULL;
4365         return ClassDef(classname, NULL, NULL, s, decorator_seq,
4366                         LINENO(n), n->n_col_offset,
4367                         end_lineno, end_col_offset, c->c_arena);
4368     }
4369 
4370     /* class NAME '(' arglist ')' ':' suite */
4371     /* build up a fake Call node so we can extract its pieces */
4372     {
4373         PyObject *dummy_name;
4374         expr_ty dummy;
4375         dummy_name = NEW_IDENTIFIER(CHILD(n, 1));
4376         if (!dummy_name)
4377             return NULL;
4378         dummy = Name(dummy_name, Load, LINENO(n), n->n_col_offset,
4379                      CHILD(n, 1)->n_end_lineno, CHILD(n, 1)->n_end_col_offset,
4380                      c->c_arena);
4381         call = ast_for_call(c, CHILD(n, 3), dummy,
4382                             CHILD(n, 1), NULL, CHILD(n, 4));
4383         if (!call)
4384             return NULL;
4385     }
4386     s = ast_for_suite(c, CHILD(n, 6));
4387     if (!s)
4388         return NULL;
4389     get_last_end_pos(s, &end_lineno, &end_col_offset);
4390 
4391     classname = NEW_IDENTIFIER(CHILD(n, 1));
4392     if (!classname)
4393         return NULL;
4394     if (forbidden_name(c, classname, CHILD(n, 1), 0))
4395         return NULL;
4396 
4397     return ClassDef(classname, call->v.Call.args, call->v.Call.keywords, s,
4398                     decorator_seq, LINENO(n), n->n_col_offset,
4399                     end_lineno, end_col_offset, c->c_arena);
4400 }
4401 
4402 static stmt_ty
ast_for_stmt(struct compiling * c,const node * n)4403 ast_for_stmt(struct compiling *c, const node *n)
4404 {
4405     if (TYPE(n) == stmt) {
4406         assert(NCH(n) == 1);
4407         n = CHILD(n, 0);
4408     }
4409     if (TYPE(n) == simple_stmt) {
4410         assert(num_stmts(n) == 1);
4411         n = CHILD(n, 0);
4412     }
4413     if (TYPE(n) == small_stmt) {
4414         n = CHILD(n, 0);
4415         /* small_stmt: expr_stmt | del_stmt | pass_stmt | flow_stmt
4416                   | import_stmt | global_stmt | nonlocal_stmt | assert_stmt
4417         */
4418         switch (TYPE(n)) {
4419             case expr_stmt:
4420                 return ast_for_expr_stmt(c, n);
4421             case del_stmt:
4422                 return ast_for_del_stmt(c, n);
4423             case pass_stmt:
4424                 return Pass(LINENO(n), n->n_col_offset,
4425                             n->n_end_lineno, n->n_end_col_offset, c->c_arena);
4426             case flow_stmt:
4427                 return ast_for_flow_stmt(c, n);
4428             case import_stmt:
4429                 return ast_for_import_stmt(c, n);
4430             case global_stmt:
4431                 return ast_for_global_stmt(c, n);
4432             case nonlocal_stmt:
4433                 return ast_for_nonlocal_stmt(c, n);
4434             case assert_stmt:
4435                 return ast_for_assert_stmt(c, n);
4436             default:
4437                 PyErr_Format(PyExc_SystemError,
4438                              "unhandled small_stmt: TYPE=%d NCH=%d\n",
4439                              TYPE(n), NCH(n));
4440                 return NULL;
4441         }
4442     }
4443     else {
4444         /* compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt
4445                         | funcdef | classdef | decorated | async_stmt
4446         */
4447         node *ch = CHILD(n, 0);
4448         REQ(n, compound_stmt);
4449         switch (TYPE(ch)) {
4450             case if_stmt:
4451                 return ast_for_if_stmt(c, ch);
4452             case while_stmt:
4453                 return ast_for_while_stmt(c, ch);
4454             case for_stmt:
4455                 return ast_for_for_stmt(c, ch, 0);
4456             case try_stmt:
4457                 return ast_for_try_stmt(c, ch);
4458             case with_stmt:
4459                 return ast_for_with_stmt(c, ch, 0);
4460             case funcdef:
4461                 return ast_for_funcdef(c, ch, NULL);
4462             case classdef:
4463                 return ast_for_classdef(c, ch, NULL);
4464             case decorated:
4465                 return ast_for_decorated(c, ch);
4466             case async_stmt:
4467                 return ast_for_async_stmt(c, ch);
4468             default:
4469                 PyErr_Format(PyExc_SystemError,
4470                              "unhandled compound_stmt: TYPE=%d NCH=%d\n",
4471                              TYPE(n), NCH(n));
4472                 return NULL;
4473         }
4474     }
4475 }
4476 
4477 static PyObject *
parsenumber_raw(struct compiling * c,const char * s)4478 parsenumber_raw(struct compiling *c, const char *s)
4479 {
4480     const char *end;
4481     long x;
4482     double dx;
4483     Py_complex compl;
4484     int imflag;
4485 
4486     assert(s != NULL);
4487     errno = 0;
4488     end = s + strlen(s) - 1;
4489     imflag = *end == 'j' || *end == 'J';
4490     if (s[0] == '0') {
4491         x = (long) PyOS_strtoul(s, (char **)&end, 0);
4492         if (x < 0 && errno == 0) {
4493             return PyLong_FromString(s, (char **)0, 0);
4494         }
4495     }
4496     else
4497         x = PyOS_strtol(s, (char **)&end, 0);
4498     if (*end == '\0') {
4499         if (errno != 0)
4500             return PyLong_FromString(s, (char **)0, 0);
4501         return PyLong_FromLong(x);
4502     }
4503     /* XXX Huge floats may silently fail */
4504     if (imflag) {
4505         compl.real = 0.;
4506         compl.imag = PyOS_string_to_double(s, (char **)&end, NULL);
4507         if (compl.imag == -1.0 && PyErr_Occurred())
4508             return NULL;
4509         return PyComplex_FromCComplex(compl);
4510     }
4511     else
4512     {
4513         dx = PyOS_string_to_double(s, NULL, NULL);
4514         if (dx == -1.0 && PyErr_Occurred())
4515             return NULL;
4516         return PyFloat_FromDouble(dx);
4517     }
4518 }
4519 
4520 static PyObject *
parsenumber(struct compiling * c,const char * s)4521 parsenumber(struct compiling *c, const char *s)
4522 {
4523     char *dup, *end;
4524     PyObject *res = NULL;
4525 
4526     assert(s != NULL);
4527 
4528     if (strchr(s, '_') == NULL) {
4529         return parsenumber_raw(c, s);
4530     }
4531     /* Create a duplicate without underscores. */
4532     dup = PyMem_Malloc(strlen(s) + 1);
4533     if (dup == NULL) {
4534         return PyErr_NoMemory();
4535     }
4536     end = dup;
4537     for (; *s; s++) {
4538         if (*s != '_') {
4539             *end++ = *s;
4540         }
4541     }
4542     *end = '\0';
4543     res = parsenumber_raw(c, dup);
4544     PyMem_Free(dup);
4545     return res;
4546 }
4547 
4548 static PyObject *
decode_utf8(struct compiling * c,const char ** sPtr,const char * end)4549 decode_utf8(struct compiling *c, const char **sPtr, const char *end)
4550 {
4551     const char *s, *t;
4552     t = s = *sPtr;
4553     /* while (s < end && *s != '\\') s++; */ /* inefficient for u".." */
4554     while (s < end && (*s & 0x80)) s++;
4555     *sPtr = s;
4556     return PyUnicode_DecodeUTF8(t, s - t, NULL);
4557 }
4558 
4559 static int
warn_invalid_escape_sequence(struct compiling * c,const node * n,unsigned char first_invalid_escape_char)4560 warn_invalid_escape_sequence(struct compiling *c, const node *n,
4561                              unsigned char first_invalid_escape_char)
4562 {
4563     PyObject *msg = PyUnicode_FromFormat("invalid escape sequence \\%c",
4564                                          first_invalid_escape_char);
4565     if (msg == NULL) {
4566         return -1;
4567     }
4568     if (PyErr_WarnExplicitObject(PyExc_DeprecationWarning, msg,
4569                                    c->c_filename, LINENO(n),
4570                                    NULL, NULL) < 0)
4571     {
4572         if (PyErr_ExceptionMatches(PyExc_DeprecationWarning)) {
4573             /* Replace the DeprecationWarning exception with a SyntaxError
4574                to get a more accurate error report */
4575             PyErr_Clear();
4576             ast_error(c, n, "%U", msg);
4577         }
4578         Py_DECREF(msg);
4579         return -1;
4580     }
4581     Py_DECREF(msg);
4582     return 0;
4583 }
4584 
4585 static PyObject *
decode_unicode_with_escapes(struct compiling * c,const node * n,const char * s,size_t len)4586 decode_unicode_with_escapes(struct compiling *c, const node *n, const char *s,
4587                             size_t len)
4588 {
4589     PyObject *v, *u;
4590     char *buf;
4591     char *p;
4592     const char *end;
4593 
4594     /* check for integer overflow */
4595     if (len > SIZE_MAX / 6)
4596         return NULL;
4597     /* "ä" (2 bytes) may become "\U000000E4" (10 bytes), or 1:5
4598        "\ä" (3 bytes) may become "\u005c\U000000E4" (16 bytes), or ~1:6 */
4599     u = PyBytes_FromStringAndSize((char *)NULL, len * 6);
4600     if (u == NULL)
4601         return NULL;
4602     p = buf = PyBytes_AsString(u);
4603     end = s + len;
4604     while (s < end) {
4605         if (*s == '\\') {
4606             *p++ = *s++;
4607             if (s >= end || *s & 0x80) {
4608                 strcpy(p, "u005c");
4609                 p += 5;
4610                 if (s >= end)
4611                     break;
4612             }
4613         }
4614         if (*s & 0x80) { /* XXX inefficient */
4615             PyObject *w;
4616             int kind;
4617             const void *data;
4618             Py_ssize_t len, i;
4619             w = decode_utf8(c, &s, end);
4620             if (w == NULL) {
4621                 Py_DECREF(u);
4622                 return NULL;
4623             }
4624             kind = PyUnicode_KIND(w);
4625             data = PyUnicode_DATA(w);
4626             len = PyUnicode_GET_LENGTH(w);
4627             for (i = 0; i < len; i++) {
4628                 Py_UCS4 chr = PyUnicode_READ(kind, data, i);
4629                 sprintf(p, "\\U%08x", chr);
4630                 p += 10;
4631             }
4632             /* Should be impossible to overflow */
4633             assert(p - buf <= PyBytes_GET_SIZE(u));
4634             Py_DECREF(w);
4635         } else {
4636             *p++ = *s++;
4637         }
4638     }
4639     len = p - buf;
4640     s = buf;
4641 
4642     const char *first_invalid_escape;
4643     v = _PyUnicode_DecodeUnicodeEscapeInternal(s, len, NULL, NULL, &first_invalid_escape);
4644 
4645     if (v != NULL && first_invalid_escape != NULL) {
4646         if (warn_invalid_escape_sequence(c, n, *first_invalid_escape) < 0) {
4647             /* We have not decref u before because first_invalid_escape points
4648                inside u. */
4649             Py_XDECREF(u);
4650             Py_DECREF(v);
4651             return NULL;
4652         }
4653     }
4654     Py_XDECREF(u);
4655     return v;
4656 }
4657 
4658 static PyObject *
decode_bytes_with_escapes(struct compiling * c,const node * n,const char * s,size_t len)4659 decode_bytes_with_escapes(struct compiling *c, const node *n, const char *s,
4660                           size_t len)
4661 {
4662     const char *first_invalid_escape;
4663     PyObject *result = _PyBytes_DecodeEscape(s, len, NULL,
4664                                              &first_invalid_escape);
4665     if (result == NULL)
4666         return NULL;
4667 
4668     if (first_invalid_escape != NULL) {
4669         if (warn_invalid_escape_sequence(c, n, *first_invalid_escape) < 0) {
4670             Py_DECREF(result);
4671             return NULL;
4672         }
4673     }
4674     return result;
4675 }
4676 
4677 /* Shift locations for the given node and all its children by adding `lineno`
4678    and `col_offset` to existing locations. */
fstring_shift_node_locations(node * n,int lineno,int col_offset)4679 static void fstring_shift_node_locations(node *n, int lineno, int col_offset)
4680 {
4681     n->n_col_offset = n->n_col_offset + col_offset;
4682     n->n_end_col_offset = n->n_end_col_offset + col_offset;
4683     for (int i = 0; i < NCH(n); ++i) {
4684         if (n->n_lineno && n->n_lineno < CHILD(n, i)->n_lineno) {
4685             /* Shifting column offsets unnecessary if there's been newlines. */
4686             col_offset = 0;
4687         }
4688         fstring_shift_node_locations(CHILD(n, i), lineno, col_offset);
4689     }
4690     n->n_lineno = n->n_lineno + lineno;
4691     n->n_end_lineno = n->n_end_lineno + lineno;
4692 }
4693 
4694 /* Fix locations for the given node and its children.
4695 
4696    `parent` is the enclosing node.
4697    `n` is the node which locations are going to be fixed relative to parent.
4698    `expr_str` is the child node's string representation, including braces.
4699 */
4700 static void
fstring_fix_node_location(const node * parent,node * n,char * expr_str)4701 fstring_fix_node_location(const node *parent, node *n, char *expr_str)
4702 {
4703     char *substr = NULL;
4704     char *start;
4705     int lines = LINENO(parent) - 1;
4706     int cols = parent->n_col_offset;
4707     /* Find the full fstring to fix location information in `n`. */
4708     while (parent && parent->n_type != STRING)
4709         parent = parent->n_child;
4710     if (parent && parent->n_str) {
4711         substr = strstr(parent->n_str, expr_str);
4712         if (substr) {
4713             start = substr;
4714             while (start > parent->n_str) {
4715                 if (start[0] == '\n')
4716                     break;
4717                 start--;
4718             }
4719             cols += (int)(substr - start);
4720             /* adjust the start based on the number of newlines encountered
4721                before the f-string expression */
4722             for (char* p = parent->n_str; p < substr; p++) {
4723                 if (*p == '\n') {
4724                     lines++;
4725                 }
4726             }
4727         }
4728     }
4729     fstring_shift_node_locations(n, lines, cols);
4730 }
4731 
4732 /* Compile this expression in to an expr_ty.  Add parens around the
4733    expression, in order to allow leading spaces in the expression. */
4734 static expr_ty
fstring_compile_expr(const char * expr_start,const char * expr_end,struct compiling * c,const node * n)4735 fstring_compile_expr(const char *expr_start, const char *expr_end,
4736                      struct compiling *c, const node *n)
4737 
4738 {
4739     node *mod_n;
4740     mod_ty mod;
4741     char *str;
4742     Py_ssize_t len;
4743     const char *s;
4744 
4745     assert(expr_end >= expr_start);
4746     assert(*(expr_start-1) == '{');
4747     assert(*expr_end == '}' || *expr_end == '!' || *expr_end == ':' ||
4748            *expr_end == '=');
4749 
4750     /* If the substring is all whitespace, it's an error.  We need to catch this
4751        here, and not when we call PyParser_SimpleParseStringFlagsFilename,
4752        because turning the expression '' in to '()' would go from being invalid
4753        to valid. */
4754     for (s = expr_start; s != expr_end; s++) {
4755         char c = *s;
4756         /* The Python parser ignores only the following whitespace
4757            characters (\r already is converted to \n). */
4758         if (!(c == ' ' || c == '\t' || c == '\n' || c == '\f')) {
4759             break;
4760         }
4761     }
4762     if (s == expr_end) {
4763         ast_error(c, n, "f-string: empty expression not allowed");
4764         return NULL;
4765     }
4766 
4767     len = expr_end - expr_start;
4768     /* Allocate 3 extra bytes: open paren, close paren, null byte. */
4769     str = PyMem_Malloc(len + 3);
4770     if (str == NULL) {
4771         PyErr_NoMemory();
4772         return NULL;
4773     }
4774 
4775     str[0] = '(';
4776     memcpy(str+1, expr_start, len);
4777     str[len+1] = ')';
4778     str[len+2] = 0;
4779 
4780     PyCompilerFlags cf = _PyCompilerFlags_INIT;
4781     cf.cf_flags = PyCF_ONLY_AST;
4782     mod_n = PyParser_SimpleParseStringFlagsFilename(str, "<fstring>",
4783                                                     Py_eval_input, 0);
4784     if (!mod_n) {
4785         PyMem_Free(str);
4786         return NULL;
4787     }
4788     /* Reuse str to find the correct column offset. */
4789     str[0] = '{';
4790     str[len+1] = '}';
4791     fstring_fix_node_location(n, mod_n, str);
4792     mod = PyAST_FromNode(mod_n, &cf, "<fstring>", c->c_arena);
4793     PyMem_Free(str);
4794     PyNode_Free(mod_n);
4795     if (!mod)
4796         return NULL;
4797     return mod->v.Expression.body;
4798 }
4799 
4800 /* Return -1 on error.
4801 
4802    Return 0 if we reached the end of the literal.
4803 
4804    Return 1 if we haven't reached the end of the literal, but we want
4805    the caller to process the literal up to this point. Used for
4806    doubled braces.
4807 */
4808 static int
fstring_find_literal(const char ** str,const char * end,int raw,PyObject ** literal,int recurse_lvl,struct compiling * c,const node * n)4809 fstring_find_literal(const char **str, const char *end, int raw,
4810                      PyObject **literal, int recurse_lvl,
4811                      struct compiling *c, const node *n)
4812 {
4813     /* Get any literal string. It ends when we hit an un-doubled left
4814        brace (which isn't part of a unicode name escape such as
4815        "\N{EULER CONSTANT}"), or the end of the string. */
4816 
4817     const char *s = *str;
4818     const char *literal_start = s;
4819     int result = 0;
4820 
4821     assert(*literal == NULL);
4822     while (s < end) {
4823         char ch = *s++;
4824         if (!raw && ch == '\\' && s < end) {
4825             ch = *s++;
4826             if (ch == 'N') {
4827                 if (s < end && *s++ == '{') {
4828                     while (s < end && *s++ != '}') {
4829                     }
4830                     continue;
4831                 }
4832                 break;
4833             }
4834             if (ch == '{' && warn_invalid_escape_sequence(c, n, ch) < 0) {
4835                 return -1;
4836             }
4837         }
4838         if (ch == '{' || ch == '}') {
4839             /* Check for doubled braces, but only at the top level. If
4840                we checked at every level, then f'{0:{3}}' would fail
4841                with the two closing braces. */
4842             if (recurse_lvl == 0) {
4843                 if (s < end && *s == ch) {
4844                     /* We're going to tell the caller that the literal ends
4845                        here, but that they should continue scanning. But also
4846                        skip over the second brace when we resume scanning. */
4847                     *str = s + 1;
4848                     result = 1;
4849                     goto done;
4850                 }
4851 
4852                 /* Where a single '{' is the start of a new expression, a
4853                    single '}' is not allowed. */
4854                 if (ch == '}') {
4855                     *str = s - 1;
4856                     ast_error(c, n, "f-string: single '}' is not allowed");
4857                     return -1;
4858                 }
4859             }
4860             /* We're either at a '{', which means we're starting another
4861                expression; or a '}', which means we're at the end of this
4862                f-string (for a nested format_spec). */
4863             s--;
4864             break;
4865         }
4866     }
4867     *str = s;
4868     assert(s <= end);
4869     assert(s == end || *s == '{' || *s == '}');
4870 done:
4871     if (literal_start != s) {
4872         if (raw)
4873             *literal = PyUnicode_DecodeUTF8Stateful(literal_start,
4874                                                     s - literal_start,
4875                                                     NULL, NULL);
4876         else
4877             *literal = decode_unicode_with_escapes(c, n, literal_start,
4878                                                    s - literal_start);
4879         if (!*literal)
4880             return -1;
4881     }
4882     return result;
4883 }
4884 
4885 /* Forward declaration because parsing is recursive. */
4886 static expr_ty
4887 fstring_parse(const char **str, const char *end, int raw, int recurse_lvl,
4888               struct compiling *c, const node *n);
4889 
4890 /* Parse the f-string at *str, ending at end.  We know *str starts an
4891    expression (so it must be a '{'). Returns the FormattedValue node, which
4892    includes the expression, conversion character, format_spec expression, and
4893    optionally the text of the expression (if = is used).
4894 
4895    Note that I don't do a perfect job here: I don't make sure that a
4896    closing brace doesn't match an opening paren, for example. It
4897    doesn't need to error on all invalid expressions, just correctly
4898    find the end of all valid ones. Any errors inside the expression
4899    will be caught when we parse it later.
4900 
4901    *expression is set to the expression.  For an '=' "debug" expression,
4902    *expr_text is set to the debug text (the original text of the expression,
4903    including the '=' and any whitespace around it, as a string object).  If
4904    not a debug expression, *expr_text set to NULL. */
4905 static int
fstring_find_expr(const char ** str,const char * end,int raw,int recurse_lvl,PyObject ** expr_text,expr_ty * expression,struct compiling * c,const node * n)4906 fstring_find_expr(const char **str, const char *end, int raw, int recurse_lvl,
4907                   PyObject **expr_text, expr_ty *expression,
4908                   struct compiling *c, const node *n)
4909 {
4910     /* Return -1 on error, else 0. */
4911 
4912     const char *expr_start;
4913     const char *expr_end;
4914     expr_ty simple_expression;
4915     expr_ty format_spec = NULL; /* Optional format specifier. */
4916     int conversion = -1; /* The conversion char.  Use default if not
4917                             specified, or !r if using = and no format
4918                             spec. */
4919 
4920     /* 0 if we're not in a string, else the quote char we're trying to
4921        match (single or double quote). */
4922     char quote_char = 0;
4923 
4924     /* If we're inside a string, 1=normal, 3=triple-quoted. */
4925     int string_type = 0;
4926 
4927     /* Keep track of nesting level for braces/parens/brackets in
4928        expressions. */
4929     Py_ssize_t nested_depth = 0;
4930     char parenstack[MAXLEVEL];
4931 
4932     *expr_text = NULL;
4933 
4934     /* Can only nest one level deep. */
4935     if (recurse_lvl >= 2) {
4936         ast_error(c, n, "f-string: expressions nested too deeply");
4937         goto error;
4938     }
4939 
4940     /* The first char must be a left brace, or we wouldn't have gotten
4941        here. Skip over it. */
4942     assert(**str == '{');
4943     *str += 1;
4944 
4945     expr_start = *str;
4946     for (; *str < end; (*str)++) {
4947         char ch;
4948 
4949         /* Loop invariants. */
4950         assert(nested_depth >= 0);
4951         assert(*str >= expr_start && *str < end);
4952         if (quote_char)
4953             assert(string_type == 1 || string_type == 3);
4954         else
4955             assert(string_type == 0);
4956 
4957         ch = **str;
4958         /* Nowhere inside an expression is a backslash allowed. */
4959         if (ch == '\\') {
4960             /* Error: can't include a backslash character, inside
4961                parens or strings or not. */
4962             ast_error(c, n,
4963                       "f-string expression part "
4964                       "cannot include a backslash");
4965             goto error;
4966         }
4967         if (quote_char) {
4968             /* We're inside a string. See if we're at the end. */
4969             /* This code needs to implement the same non-error logic
4970                as tok_get from tokenizer.c, at the letter_quote
4971                label. To actually share that code would be a
4972                nightmare. But, it's unlikely to change and is small,
4973                so duplicate it here. Note we don't need to catch all
4974                of the errors, since they'll be caught when parsing the
4975                expression. We just need to match the non-error
4976                cases. Thus we can ignore \n in single-quoted strings,
4977                for example. Or non-terminated strings. */
4978             if (ch == quote_char) {
4979                 /* Does this match the string_type (single or triple
4980                    quoted)? */
4981                 if (string_type == 3) {
4982                     if (*str+2 < end && *(*str+1) == ch && *(*str+2) == ch) {
4983                         /* We're at the end of a triple quoted string. */
4984                         *str += 2;
4985                         string_type = 0;
4986                         quote_char = 0;
4987                         continue;
4988                     }
4989                 } else {
4990                     /* We're at the end of a normal string. */
4991                     quote_char = 0;
4992                     string_type = 0;
4993                     continue;
4994                 }
4995             }
4996         } else if (ch == '\'' || ch == '"') {
4997             /* Is this a triple quoted string? */
4998             if (*str+2 < end && *(*str+1) == ch && *(*str+2) == ch) {
4999                 string_type = 3;
5000                 *str += 2;
5001             } else {
5002                 /* Start of a normal string. */
5003                 string_type = 1;
5004             }
5005             /* Start looking for the end of the string. */
5006             quote_char = ch;
5007         } else if (ch == '[' || ch == '{' || ch == '(') {
5008             if (nested_depth >= MAXLEVEL) {
5009                 ast_error(c, n, "f-string: too many nested parenthesis");
5010                 goto error;
5011             }
5012             parenstack[nested_depth] = ch;
5013             nested_depth++;
5014         } else if (ch == '#') {
5015             /* Error: can't include a comment character, inside parens
5016                or not. */
5017             ast_error(c, n, "f-string expression part cannot include '#'");
5018             goto error;
5019         } else if (nested_depth == 0 &&
5020                    (ch == '!' || ch == ':' || ch == '}' ||
5021                     ch == '=' || ch == '>' || ch == '<')) {
5022             /* See if there's a next character. */
5023             if (*str+1 < end) {
5024                 char next = *(*str+1);
5025 
5026                 /* For "!=". since '=' is not an allowed conversion character,
5027                    nothing is lost in this test. */
5028                 if ((ch == '!' && next == '=') ||   /* != */
5029                     (ch == '=' && next == '=') ||   /* == */
5030                     (ch == '<' && next == '=') ||   /* <= */
5031                     (ch == '>' && next == '=')      /* >= */
5032                     ) {
5033                     *str += 1;
5034                     continue;
5035                 }
5036                 /* Don't get out of the loop for these, if they're single
5037                    chars (not part of 2-char tokens). If by themselves, they
5038                    don't end an expression (unlike say '!'). */
5039                 if (ch == '>' || ch == '<') {
5040                     continue;
5041                 }
5042             }
5043 
5044             /* Normal way out of this loop. */
5045             break;
5046         } else if (ch == ']' || ch == '}' || ch == ')') {
5047             if (!nested_depth) {
5048                 ast_error(c, n, "f-string: unmatched '%c'", ch);
5049                 goto error;
5050             }
5051             nested_depth--;
5052             int opening = parenstack[nested_depth];
5053             if (!((opening == '(' && ch == ')') ||
5054                   (opening == '[' && ch == ']') ||
5055                   (opening == '{' && ch == '}')))
5056             {
5057                 ast_error(c, n,
5058                           "f-string: closing parenthesis '%c' "
5059                           "does not match opening parenthesis '%c'",
5060                           ch, opening);
5061                 goto error;
5062             }
5063         } else {
5064             /* Just consume this char and loop around. */
5065         }
5066     }
5067     expr_end = *str;
5068     /* If we leave this loop in a string or with mismatched parens, we
5069        don't care. We'll get a syntax error when compiling the
5070        expression. But, we can produce a better error message, so
5071        let's just do that.*/
5072     if (quote_char) {
5073         ast_error(c, n, "f-string: unterminated string");
5074         goto error;
5075     }
5076     if (nested_depth) {
5077         int opening = parenstack[nested_depth - 1];
5078         ast_error(c, n, "f-string: unmatched '%c'", opening);
5079         goto error;
5080     }
5081 
5082     if (*str >= end)
5083         goto unexpected_end_of_string;
5084 
5085     /* Compile the expression as soon as possible, so we show errors
5086        related to the expression before errors related to the
5087        conversion or format_spec. */
5088     simple_expression = fstring_compile_expr(expr_start, expr_end, c, n);
5089     if (!simple_expression)
5090         goto error;
5091 
5092     /* Check for =, which puts the text value of the expression in
5093        expr_text. */
5094     if (**str == '=') {
5095         if (c->c_feature_version < 8) {
5096             ast_error(c, n,
5097                       "f-string: self documenting expressions are "
5098                       "only supported in Python 3.8 and greater");
5099             goto error;
5100         }
5101         *str += 1;
5102 
5103         /* Skip over ASCII whitespace.  No need to test for end of string
5104            here, since we know there's at least a trailing quote somewhere
5105            ahead. */
5106         while (Py_ISSPACE(**str)) {
5107             *str += 1;
5108         }
5109 
5110         /* Set *expr_text to the text of the expression. */
5111         *expr_text = PyUnicode_FromStringAndSize(expr_start, *str-expr_start);
5112         if (!*expr_text) {
5113             goto error;
5114         }
5115     }
5116 
5117     /* Check for a conversion char, if present. */
5118     if (**str == '!') {
5119         *str += 1;
5120         if (*str >= end)
5121             goto unexpected_end_of_string;
5122 
5123         conversion = **str;
5124         *str += 1;
5125 
5126         /* Validate the conversion. */
5127         if (!(conversion == 's' || conversion == 'r' || conversion == 'a')) {
5128             ast_error(c, n,
5129                       "f-string: invalid conversion character: "
5130                       "expected 's', 'r', or 'a'");
5131             goto error;
5132         }
5133 
5134     }
5135 
5136     /* Check for the format spec, if present. */
5137     if (*str >= end)
5138         goto unexpected_end_of_string;
5139     if (**str == ':') {
5140         *str += 1;
5141         if (*str >= end)
5142             goto unexpected_end_of_string;
5143 
5144         /* Parse the format spec. */
5145         format_spec = fstring_parse(str, end, raw, recurse_lvl+1, c, n);
5146         if (!format_spec)
5147             goto error;
5148     }
5149 
5150     if (*str >= end || **str != '}')
5151         goto unexpected_end_of_string;
5152 
5153     /* We're at a right brace. Consume it. */
5154     assert(*str < end);
5155     assert(**str == '}');
5156     *str += 1;
5157 
5158     /* If we're in = mode (detected by non-NULL expr_text), and have no format
5159        spec and no explicit conversion, set the conversion to 'r'. */
5160     if (*expr_text && format_spec == NULL && conversion == -1) {
5161         conversion = 'r';
5162     }
5163 
5164     /* And now create the FormattedValue node that represents this
5165        entire expression with the conversion and format spec. */
5166     *expression = FormattedValue(simple_expression, conversion,
5167                                  format_spec, LINENO(n),
5168                                  n->n_col_offset, n->n_end_lineno,
5169                                  n->n_end_col_offset, c->c_arena);
5170     if (!*expression)
5171         goto error;
5172 
5173     return 0;
5174 
5175 unexpected_end_of_string:
5176     ast_error(c, n, "f-string: expecting '}'");
5177     /* Falls through to error. */
5178 
5179 error:
5180     Py_XDECREF(*expr_text);
5181     return -1;
5182 
5183 }
5184 
5185 /* Return -1 on error.
5186 
5187    Return 0 if we have a literal (possible zero length) and an
5188    expression (zero length if at the end of the string.
5189 
5190    Return 1 if we have a literal, but no expression, and we want the
5191    caller to call us again. This is used to deal with doubled
5192    braces.
5193 
5194    When called multiple times on the string 'a{{b{0}c', this function
5195    will return:
5196 
5197    1. the literal 'a{' with no expression, and a return value
5198       of 1. Despite the fact that there's no expression, the return
5199       value of 1 means we're not finished yet.
5200 
5201    2. the literal 'b' and the expression '0', with a return value of
5202       0. The fact that there's an expression means we're not finished.
5203 
5204    3. literal 'c' with no expression and a return value of 0. The
5205       combination of the return value of 0 with no expression means
5206       we're finished.
5207 */
5208 static int
fstring_find_literal_and_expr(const char ** str,const char * end,int raw,int recurse_lvl,PyObject ** literal,PyObject ** expr_text,expr_ty * expression,struct compiling * c,const node * n)5209 fstring_find_literal_and_expr(const char **str, const char *end, int raw,
5210                               int recurse_lvl, PyObject **literal,
5211                               PyObject **expr_text, expr_ty *expression,
5212                               struct compiling *c, const node *n)
5213 {
5214     int result;
5215 
5216     assert(*literal == NULL && *expression == NULL);
5217 
5218     /* Get any literal string. */
5219     result = fstring_find_literal(str, end, raw, literal, recurse_lvl, c, n);
5220     if (result < 0)
5221         goto error;
5222 
5223     assert(result == 0 || result == 1);
5224 
5225     if (result == 1)
5226         /* We have a literal, but don't look at the expression. */
5227         return 1;
5228 
5229     if (*str >= end || **str == '}')
5230         /* We're at the end of the string or the end of a nested
5231            f-string: no expression. The top-level error case where we
5232            expect to be at the end of the string but we're at a '}' is
5233            handled later. */
5234         return 0;
5235 
5236     /* We must now be the start of an expression, on a '{'. */
5237     assert(**str == '{');
5238 
5239     if (fstring_find_expr(str, end, raw, recurse_lvl, expr_text,
5240                           expression, c, n) < 0)
5241         goto error;
5242 
5243     return 0;
5244 
5245 error:
5246     Py_CLEAR(*literal);
5247     return -1;
5248 }
5249 
5250 #define EXPRLIST_N_CACHED  64
5251 
5252 typedef struct {
5253     /* Incrementally build an array of expr_ty, so be used in an
5254        asdl_seq. Cache some small but reasonably sized number of
5255        expr_ty's, and then after that start dynamically allocating,
5256        doubling the number allocated each time. Note that the f-string
5257        f'{0}a{1}' contains 3 expr_ty's: 2 FormattedValue's, and one
5258        Constant for the literal 'a'. So you add expr_ty's about twice as
5259        fast as you add expressions in an f-string. */
5260 
5261     Py_ssize_t allocated;  /* Number we've allocated. */
5262     Py_ssize_t size;       /* Number we've used. */
5263     expr_ty    *p;         /* Pointer to the memory we're actually
5264                               using. Will point to 'data' until we
5265                               start dynamically allocating. */
5266     expr_ty    data[EXPRLIST_N_CACHED];
5267 } ExprList;
5268 
5269 #ifdef NDEBUG
5270 #define ExprList_check_invariants(l)
5271 #else
5272 static void
ExprList_check_invariants(ExprList * l)5273 ExprList_check_invariants(ExprList *l)
5274 {
5275     /* Check our invariants. Make sure this object is "live", and
5276        hasn't been deallocated. */
5277     assert(l->size >= 0);
5278     assert(l->p != NULL);
5279     if (l->size <= EXPRLIST_N_CACHED)
5280         assert(l->data == l->p);
5281 }
5282 #endif
5283 
5284 static void
ExprList_Init(ExprList * l)5285 ExprList_Init(ExprList *l)
5286 {
5287     l->allocated = EXPRLIST_N_CACHED;
5288     l->size = 0;
5289 
5290     /* Until we start allocating dynamically, p points to data. */
5291     l->p = l->data;
5292 
5293     ExprList_check_invariants(l);
5294 }
5295 
5296 static int
ExprList_Append(ExprList * l,expr_ty exp)5297 ExprList_Append(ExprList *l, expr_ty exp)
5298 {
5299     ExprList_check_invariants(l);
5300     if (l->size >= l->allocated) {
5301         /* We need to alloc (or realloc) the memory. */
5302         Py_ssize_t new_size = l->allocated * 2;
5303 
5304         /* See if we've ever allocated anything dynamically. */
5305         if (l->p == l->data) {
5306             Py_ssize_t i;
5307             /* We're still using the cached data. Switch to
5308                alloc-ing. */
5309             l->p = PyMem_Malloc(sizeof(expr_ty) * new_size);
5310             if (!l->p)
5311                 return -1;
5312             /* Copy the cached data into the new buffer. */
5313             for (i = 0; i < l->size; i++)
5314                 l->p[i] = l->data[i];
5315         } else {
5316             /* Just realloc. */
5317             expr_ty *tmp = PyMem_Realloc(l->p, sizeof(expr_ty) * new_size);
5318             if (!tmp) {
5319                 PyMem_Free(l->p);
5320                 l->p = NULL;
5321                 return -1;
5322             }
5323             l->p = tmp;
5324         }
5325 
5326         l->allocated = new_size;
5327         assert(l->allocated == 2 * l->size);
5328     }
5329 
5330     l->p[l->size++] = exp;
5331 
5332     ExprList_check_invariants(l);
5333     return 0;
5334 }
5335 
5336 static void
ExprList_Dealloc(ExprList * l)5337 ExprList_Dealloc(ExprList *l)
5338 {
5339     ExprList_check_invariants(l);
5340 
5341     /* If there's been an error, or we've never dynamically allocated,
5342        do nothing. */
5343     if (!l->p || l->p == l->data) {
5344         /* Do nothing. */
5345     } else {
5346         /* We have dynamically allocated. Free the memory. */
5347         PyMem_Free(l->p);
5348     }
5349     l->p = NULL;
5350     l->size = -1;
5351 }
5352 
5353 static asdl_seq *
ExprList_Finish(ExprList * l,PyArena * arena)5354 ExprList_Finish(ExprList *l, PyArena *arena)
5355 {
5356     asdl_seq *seq;
5357 
5358     ExprList_check_invariants(l);
5359 
5360     /* Allocate the asdl_seq and copy the expressions in to it. */
5361     seq = _Py_asdl_seq_new(l->size, arena);
5362     if (seq) {
5363         Py_ssize_t i;
5364         for (i = 0; i < l->size; i++)
5365             asdl_seq_SET(seq, i, l->p[i]);
5366     }
5367     ExprList_Dealloc(l);
5368     return seq;
5369 }
5370 
5371 /* The FstringParser is designed to add a mix of strings and
5372    f-strings, and concat them together as needed. Ultimately, it
5373    generates an expr_ty. */
5374 typedef struct {
5375     PyObject *last_str;
5376     ExprList expr_list;
5377     int fmode;
5378 } FstringParser;
5379 
5380 #ifdef NDEBUG
5381 #define FstringParser_check_invariants(state)
5382 #else
5383 static void
FstringParser_check_invariants(FstringParser * state)5384 FstringParser_check_invariants(FstringParser *state)
5385 {
5386     if (state->last_str)
5387         assert(PyUnicode_CheckExact(state->last_str));
5388     ExprList_check_invariants(&state->expr_list);
5389 }
5390 #endif
5391 
5392 static void
FstringParser_Init(FstringParser * state)5393 FstringParser_Init(FstringParser *state)
5394 {
5395     state->last_str = NULL;
5396     state->fmode = 0;
5397     ExprList_Init(&state->expr_list);
5398     FstringParser_check_invariants(state);
5399 }
5400 
5401 static void
FstringParser_Dealloc(FstringParser * state)5402 FstringParser_Dealloc(FstringParser *state)
5403 {
5404     FstringParser_check_invariants(state);
5405 
5406     Py_XDECREF(state->last_str);
5407     ExprList_Dealloc(&state->expr_list);
5408 }
5409 
5410 /* Constants for the following */
5411 static PyObject *u_kind;
5412 
5413 /* Compute 'kind' field for string Constant (either 'u' or None) */
5414 static PyObject *
make_kind(struct compiling * c,const node * n)5415 make_kind(struct compiling *c, const node *n)
5416 {
5417     char *s = NULL;
5418     PyObject *kind = NULL;
5419 
5420     /* Find the first string literal, if any */
5421     while (TYPE(n) != STRING) {
5422         if (NCH(n) == 0)
5423             return NULL;
5424         n = CHILD(n, 0);
5425     }
5426     REQ(n, STRING);
5427 
5428     /* If it starts with 'u', return a PyUnicode "u" string */
5429     s = STR(n);
5430     if (s && *s == 'u') {
5431         if (!u_kind) {
5432             u_kind = PyUnicode_InternFromString("u");
5433             if (!u_kind)
5434                 return NULL;
5435         }
5436         kind = u_kind;
5437         if (PyArena_AddPyObject(c->c_arena, kind) < 0) {
5438             return NULL;
5439         }
5440         Py_INCREF(kind);
5441     }
5442     return kind;
5443 }
5444 
5445 /* Make a Constant node, but decref the PyUnicode object being added. */
5446 static expr_ty
make_str_node_and_del(PyObject ** str,struct compiling * c,const node * n)5447 make_str_node_and_del(PyObject **str, struct compiling *c, const node* n)
5448 {
5449     PyObject *s = *str;
5450     PyObject *kind = NULL;
5451     *str = NULL;
5452     assert(PyUnicode_CheckExact(s));
5453     if (PyArena_AddPyObject(c->c_arena, s) < 0) {
5454         Py_DECREF(s);
5455         return NULL;
5456     }
5457     kind = make_kind(c, n);
5458     if (kind == NULL && PyErr_Occurred())
5459         return NULL;
5460     return Constant(s, kind, LINENO(n), n->n_col_offset,
5461                     n->n_end_lineno, n->n_end_col_offset, c->c_arena);
5462 }
5463 
5464 /* Add a non-f-string (that is, a regular literal string). str is
5465    decref'd. */
5466 static int
FstringParser_ConcatAndDel(FstringParser * state,PyObject * str)5467 FstringParser_ConcatAndDel(FstringParser *state, PyObject *str)
5468 {
5469     FstringParser_check_invariants(state);
5470 
5471     assert(PyUnicode_CheckExact(str));
5472 
5473     if (PyUnicode_GET_LENGTH(str) == 0) {
5474         Py_DECREF(str);
5475         return 0;
5476     }
5477 
5478     if (!state->last_str) {
5479         /* We didn't have a string before, so just remember this one. */
5480         state->last_str = str;
5481     } else {
5482         /* Concatenate this with the previous string. */
5483         PyUnicode_AppendAndDel(&state->last_str, str);
5484         if (!state->last_str)
5485             return -1;
5486     }
5487     FstringParser_check_invariants(state);
5488     return 0;
5489 }
5490 
5491 /* Parse an f-string. The f-string is in *str to end, with no
5492    'f' or quotes. */
5493 static int
FstringParser_ConcatFstring(FstringParser * state,const char ** str,const char * end,int raw,int recurse_lvl,struct compiling * c,const node * n)5494 FstringParser_ConcatFstring(FstringParser *state, const char **str,
5495                             const char *end, int raw, int recurse_lvl,
5496                             struct compiling *c, const node *n)
5497 {
5498     FstringParser_check_invariants(state);
5499     state->fmode = 1;
5500 
5501     /* Parse the f-string. */
5502     while (1) {
5503         PyObject *literal = NULL;
5504         PyObject *expr_text = NULL;
5505         expr_ty expression = NULL;
5506 
5507         /* If there's a zero length literal in front of the
5508            expression, literal will be NULL. If we're at the end of
5509            the f-string, expression will be NULL (unless result == 1,
5510            see below). */
5511         int result = fstring_find_literal_and_expr(str, end, raw, recurse_lvl,
5512                                                    &literal, &expr_text,
5513                                                    &expression, c, n);
5514         if (result < 0)
5515             return -1;
5516 
5517         /* Add the literal, if any. */
5518         if (literal && FstringParser_ConcatAndDel(state, literal) < 0) {
5519             Py_XDECREF(expr_text);
5520             return -1;
5521         }
5522         /* Add the expr_text, if any. */
5523         if (expr_text && FstringParser_ConcatAndDel(state, expr_text) < 0) {
5524             return -1;
5525         }
5526 
5527         /* We've dealt with the literal and expr_text, their ownership has
5528            been transferred to the state object.  Don't look at them again. */
5529 
5530         /* See if we should just loop around to get the next literal
5531            and expression, while ignoring the expression this
5532            time. This is used for un-doubling braces, as an
5533            optimization. */
5534         if (result == 1)
5535             continue;
5536 
5537         if (!expression)
5538             /* We're done with this f-string. */
5539             break;
5540 
5541         /* We know we have an expression. Convert any existing string
5542            to a Constant node. */
5543         if (!state->last_str) {
5544             /* Do nothing. No previous literal. */
5545         } else {
5546             /* Convert the existing last_str literal to a Constant node. */
5547             expr_ty str = make_str_node_and_del(&state->last_str, c, n);
5548             if (!str || ExprList_Append(&state->expr_list, str) < 0)
5549                 return -1;
5550         }
5551 
5552         if (ExprList_Append(&state->expr_list, expression) < 0)
5553             return -1;
5554     }
5555 
5556     /* If recurse_lvl is zero, then we must be at the end of the
5557        string. Otherwise, we must be at a right brace. */
5558 
5559     if (recurse_lvl == 0 && *str < end-1) {
5560         ast_error(c, n, "f-string: unexpected end of string");
5561         return -1;
5562     }
5563     if (recurse_lvl != 0 && **str != '}') {
5564         ast_error(c, n, "f-string: expecting '}'");
5565         return -1;
5566     }
5567 
5568     FstringParser_check_invariants(state);
5569     return 0;
5570 }
5571 
5572 /* Convert the partial state reflected in last_str and expr_list to an
5573    expr_ty. The expr_ty can be a Constant, or a JoinedStr. */
5574 static expr_ty
FstringParser_Finish(FstringParser * state,struct compiling * c,const node * n)5575 FstringParser_Finish(FstringParser *state, struct compiling *c,
5576                      const node *n)
5577 {
5578     asdl_seq *seq;
5579 
5580     FstringParser_check_invariants(state);
5581 
5582     /* If we're just a constant string with no expressions, return
5583        that. */
5584     if (!state->fmode) {
5585         assert(!state->expr_list.size);
5586         if (!state->last_str) {
5587             /* Create a zero length string. */
5588             state->last_str = PyUnicode_FromStringAndSize(NULL, 0);
5589             if (!state->last_str)
5590                 goto error;
5591         }
5592         return make_str_node_and_del(&state->last_str, c, n);
5593     }
5594 
5595     /* Create a Constant node out of last_str, if needed. It will be the
5596        last node in our expression list. */
5597     if (state->last_str) {
5598         expr_ty str = make_str_node_and_del(&state->last_str, c, n);
5599         if (!str || ExprList_Append(&state->expr_list, str) < 0)
5600             goto error;
5601     }
5602     /* This has already been freed. */
5603     assert(state->last_str == NULL);
5604 
5605     seq = ExprList_Finish(&state->expr_list, c->c_arena);
5606     if (!seq)
5607         goto error;
5608 
5609     return JoinedStr(seq, LINENO(n), n->n_col_offset,
5610                      n->n_end_lineno, n->n_end_col_offset, c->c_arena);
5611 
5612 error:
5613     FstringParser_Dealloc(state);
5614     return NULL;
5615 }
5616 
5617 /* Given an f-string (with no 'f' or quotes) that's in *str and ends
5618    at end, parse it into an expr_ty.  Return NULL on error.  Adjust
5619    str to point past the parsed portion. */
5620 static expr_ty
fstring_parse(const char ** str,const char * end,int raw,int recurse_lvl,struct compiling * c,const node * n)5621 fstring_parse(const char **str, const char *end, int raw, int recurse_lvl,
5622               struct compiling *c, const node *n)
5623 {
5624     FstringParser state;
5625 
5626     FstringParser_Init(&state);
5627     if (FstringParser_ConcatFstring(&state, str, end, raw, recurse_lvl,
5628                                     c, n) < 0) {
5629         FstringParser_Dealloc(&state);
5630         return NULL;
5631     }
5632 
5633     return FstringParser_Finish(&state, c, n);
5634 }
5635 
5636 /* n is a Python string literal, including the bracketing quote
5637    characters, and r, b, u, &/or f prefixes (if any), and embedded
5638    escape sequences (if any). parsestr parses it, and sets *result to
5639    decoded Python string object.  If the string is an f-string, set
5640    *fstr and *fstrlen to the unparsed string object.  Return 0 if no
5641    errors occurred.
5642 */
5643 static int
parsestr(struct compiling * c,const node * n,int * bytesmode,int * rawmode,PyObject ** result,const char ** fstr,Py_ssize_t * fstrlen)5644 parsestr(struct compiling *c, const node *n, int *bytesmode, int *rawmode,
5645          PyObject **result, const char **fstr, Py_ssize_t *fstrlen)
5646 {
5647     size_t len;
5648     const char *s = STR(n);
5649     int quote = Py_CHARMASK(*s);
5650     int fmode = 0;
5651     *bytesmode = 0;
5652     *rawmode = 0;
5653     *result = NULL;
5654     *fstr = NULL;
5655     if (Py_ISALPHA(quote)) {
5656         while (!*bytesmode || !*rawmode) {
5657             if (quote == 'b' || quote == 'B') {
5658                 quote = *++s;
5659                 *bytesmode = 1;
5660             }
5661             else if (quote == 'u' || quote == 'U') {
5662                 quote = *++s;
5663             }
5664             else if (quote == 'r' || quote == 'R') {
5665                 quote = *++s;
5666                 *rawmode = 1;
5667             }
5668             else if (quote == 'f' || quote == 'F') {
5669                 quote = *++s;
5670                 fmode = 1;
5671             }
5672             else {
5673                 break;
5674             }
5675         }
5676     }
5677 
5678     /* fstrings are only allowed in Python 3.6 and greater */
5679     if (fmode && c->c_feature_version < 6) {
5680         ast_error(c, n, "Format strings are only supported in Python 3.6 and greater");
5681         return -1;
5682     }
5683 
5684     if (fmode && *bytesmode) {
5685         PyErr_BadInternalCall();
5686         return -1;
5687     }
5688     if (quote != '\'' && quote != '\"') {
5689         PyErr_BadInternalCall();
5690         return -1;
5691     }
5692     /* Skip the leading quote char. */
5693     s++;
5694     len = strlen(s);
5695     if (len > INT_MAX) {
5696         PyErr_SetString(PyExc_OverflowError,
5697                         "string to parse is too long");
5698         return -1;
5699     }
5700     if (s[--len] != quote) {
5701         /* Last quote char must match the first. */
5702         PyErr_BadInternalCall();
5703         return -1;
5704     }
5705     if (len >= 4 && s[0] == quote && s[1] == quote) {
5706         /* A triple quoted string. We've already skipped one quote at
5707            the start and one at the end of the string. Now skip the
5708            two at the start. */
5709         s += 2;
5710         len -= 2;
5711         /* And check that the last two match. */
5712         if (s[--len] != quote || s[--len] != quote) {
5713             PyErr_BadInternalCall();
5714             return -1;
5715         }
5716     }
5717 
5718     if (fmode) {
5719         /* Just return the bytes. The caller will parse the resulting
5720            string. */
5721         *fstr = s;
5722         *fstrlen = len;
5723         return 0;
5724     }
5725 
5726     /* Not an f-string. */
5727     /* Avoid invoking escape decoding routines if possible. */
5728     *rawmode = *rawmode || strchr(s, '\\') == NULL;
5729     if (*bytesmode) {
5730         /* Disallow non-ASCII characters. */
5731         const char *ch;
5732         for (ch = s; *ch; ch++) {
5733             if (Py_CHARMASK(*ch) >= 0x80) {
5734                 ast_error(c, n,
5735                           "bytes can only contain ASCII "
5736                           "literal characters.");
5737                 return -1;
5738             }
5739         }
5740         if (*rawmode)
5741             *result = PyBytes_FromStringAndSize(s, len);
5742         else
5743             *result = decode_bytes_with_escapes(c, n, s, len);
5744     } else {
5745         if (*rawmode)
5746             *result = PyUnicode_DecodeUTF8Stateful(s, len, NULL, NULL);
5747         else
5748             *result = decode_unicode_with_escapes(c, n, s, len);
5749     }
5750     return *result == NULL ? -1 : 0;
5751 }
5752 
5753 /* Accepts a STRING+ atom, and produces an expr_ty node. Run through
5754    each STRING atom, and process it as needed. For bytes, just
5755    concatenate them together, and the result will be a Constant node. For
5756    normal strings and f-strings, concatenate them together. The result
5757    will be a Constant node if there were no f-strings; a FormattedValue
5758    node if there's just an f-string (with no leading or trailing
5759    literals), or a JoinedStr node if there are multiple f-strings or
5760    any literals involved. */
5761 static expr_ty
parsestrplus(struct compiling * c,const node * n)5762 parsestrplus(struct compiling *c, const node *n)
5763 {
5764     int bytesmode = 0;
5765     PyObject *bytes_str = NULL;
5766     int i;
5767 
5768     FstringParser state;
5769     FstringParser_Init(&state);
5770 
5771     for (i = 0; i < NCH(n); i++) {
5772         int this_bytesmode;
5773         int this_rawmode;
5774         PyObject *s;
5775         const char *fstr;
5776         Py_ssize_t fstrlen = -1;  /* Silence a compiler warning. */
5777 
5778         REQ(CHILD(n, i), STRING);
5779         if (parsestr(c, CHILD(n, i), &this_bytesmode, &this_rawmode, &s,
5780                      &fstr, &fstrlen) != 0)
5781             goto error;
5782 
5783         /* Check that we're not mixing bytes with unicode. */
5784         if (i != 0 && bytesmode != this_bytesmode) {
5785             ast_error(c, n, "cannot mix bytes and nonbytes literals");
5786             /* s is NULL if the current string part is an f-string. */
5787             Py_XDECREF(s);
5788             goto error;
5789         }
5790         bytesmode = this_bytesmode;
5791 
5792         if (fstr != NULL) {
5793             int result;
5794             assert(s == NULL && !bytesmode);
5795             /* This is an f-string. Parse and concatenate it. */
5796             result = FstringParser_ConcatFstring(&state, &fstr, fstr+fstrlen,
5797                                                  this_rawmode, 0, c, n);
5798             if (result < 0)
5799                 goto error;
5800         } else {
5801             /* A string or byte string. */
5802             assert(s != NULL && fstr == NULL);
5803 
5804             assert(bytesmode ? PyBytes_CheckExact(s) :
5805                    PyUnicode_CheckExact(s));
5806 
5807             if (bytesmode) {
5808                 /* For bytes, concat as we go. */
5809                 if (i == 0) {
5810                     /* First time, just remember this value. */
5811                     bytes_str = s;
5812                 } else {
5813                     PyBytes_ConcatAndDel(&bytes_str, s);
5814                     if (!bytes_str)
5815                         goto error;
5816                 }
5817             } else {
5818                 /* This is a regular string. Concatenate it. */
5819                 if (FstringParser_ConcatAndDel(&state, s) < 0)
5820                     goto error;
5821             }
5822         }
5823     }
5824     if (bytesmode) {
5825         /* Just return the bytes object and we're done. */
5826         if (PyArena_AddPyObject(c->c_arena, bytes_str) < 0)
5827             goto error;
5828         return Constant(bytes_str, NULL, LINENO(n), n->n_col_offset,
5829                         n->n_end_lineno, n->n_end_col_offset, c->c_arena);
5830     }
5831 
5832     /* We're not a bytes string, bytes_str should never have been set. */
5833     assert(bytes_str == NULL);
5834 
5835     return FstringParser_Finish(&state, c, n);
5836 
5837 error:
5838     Py_XDECREF(bytes_str);
5839     FstringParser_Dealloc(&state);
5840     return NULL;
5841 }
5842 
5843 PyObject *
_PyAST_GetDocString(asdl_seq * body)5844 _PyAST_GetDocString(asdl_seq *body)
5845 {
5846     if (!asdl_seq_LEN(body)) {
5847         return NULL;
5848     }
5849     stmt_ty st = (stmt_ty)asdl_seq_GET(body, 0);
5850     if (st->kind != Expr_kind) {
5851         return NULL;
5852     }
5853     expr_ty e = st->v.Expr.value;
5854     if (e->kind == Constant_kind && PyUnicode_CheckExact(e->v.Constant.value)) {
5855         return e->v.Constant.value;
5856     }
5857     return NULL;
5858 }
5859