1 /*
2 * This file includes functions to transform a concrete syntax tree (CST) to
3 * an abstract syntax tree (AST). The main function is PyAST_FromNode().
4 *
5 */
6 #include "Python.h"
7 #include "Python-ast.h"
8 #include "node.h"
9 #include "ast.h"
10 #include "token.h"
11 #include "pythonrun.h"
12
13 #include <assert.h>
14 #include <stdbool.h>
15
16 #define MAXLEVEL 200 /* Max parentheses level */
17
18 static int validate_stmts(asdl_seq *);
19 static int validate_exprs(asdl_seq *, expr_context_ty, int);
20 static int validate_nonempty_seq(asdl_seq *, const char *, const char *);
21 static int validate_stmt(stmt_ty);
22 static int validate_expr(expr_ty, expr_context_ty);
23
24 static int
validate_name(PyObject * name)25 validate_name(PyObject *name)
26 {
27 assert(PyUnicode_Check(name));
28 static const char * const forbidden[] = {
29 "None",
30 "True",
31 "False",
32 NULL
33 };
34 for (int i = 0; forbidden[i] != NULL; i++) {
35 if (_PyUnicode_EqualToASCIIString(name, forbidden[i])) {
36 PyErr_Format(PyExc_ValueError, "Name node can't be used with '%s' constant", forbidden[i]);
37 return 0;
38 }
39 }
40 return 1;
41 }
42
43 static int
validate_comprehension(asdl_seq * gens)44 validate_comprehension(asdl_seq *gens)
45 {
46 Py_ssize_t i;
47 if (!asdl_seq_LEN(gens)) {
48 PyErr_SetString(PyExc_ValueError, "comprehension with no generators");
49 return 0;
50 }
51 for (i = 0; i < asdl_seq_LEN(gens); i++) {
52 comprehension_ty comp = asdl_seq_GET(gens, i);
53 if (!validate_expr(comp->target, Store) ||
54 !validate_expr(comp->iter, Load) ||
55 !validate_exprs(comp->ifs, Load, 0))
56 return 0;
57 }
58 return 1;
59 }
60
61 static int
validate_slice(slice_ty slice)62 validate_slice(slice_ty slice)
63 {
64 switch (slice->kind) {
65 case Slice_kind:
66 return (!slice->v.Slice.lower || validate_expr(slice->v.Slice.lower, Load)) &&
67 (!slice->v.Slice.upper || validate_expr(slice->v.Slice.upper, Load)) &&
68 (!slice->v.Slice.step || validate_expr(slice->v.Slice.step, Load));
69 case ExtSlice_kind: {
70 Py_ssize_t i;
71 if (!validate_nonempty_seq(slice->v.ExtSlice.dims, "dims", "ExtSlice"))
72 return 0;
73 for (i = 0; i < asdl_seq_LEN(slice->v.ExtSlice.dims); i++)
74 if (!validate_slice(asdl_seq_GET(slice->v.ExtSlice.dims, i)))
75 return 0;
76 return 1;
77 }
78 case Index_kind:
79 return validate_expr(slice->v.Index.value, Load);
80 default:
81 PyErr_SetString(PyExc_SystemError, "unknown slice node");
82 return 0;
83 }
84 }
85
86 static int
validate_keywords(asdl_seq * keywords)87 validate_keywords(asdl_seq *keywords)
88 {
89 Py_ssize_t i;
90 for (i = 0; i < asdl_seq_LEN(keywords); i++)
91 if (!validate_expr(((keyword_ty)asdl_seq_GET(keywords, i))->value, Load))
92 return 0;
93 return 1;
94 }
95
96 static int
validate_args(asdl_seq * args)97 validate_args(asdl_seq *args)
98 {
99 Py_ssize_t i;
100 for (i = 0; i < asdl_seq_LEN(args); i++) {
101 arg_ty arg = asdl_seq_GET(args, i);
102 if (arg->annotation && !validate_expr(arg->annotation, Load))
103 return 0;
104 }
105 return 1;
106 }
107
108 static const char *
expr_context_name(expr_context_ty ctx)109 expr_context_name(expr_context_ty ctx)
110 {
111 switch (ctx) {
112 case Load:
113 return "Load";
114 case Store:
115 return "Store";
116 case Del:
117 return "Del";
118 case AugLoad:
119 return "AugLoad";
120 case AugStore:
121 return "AugStore";
122 case Param:
123 return "Param";
124 default:
125 Py_UNREACHABLE();
126 }
127 }
128
129 static int
validate_arguments(arguments_ty args)130 validate_arguments(arguments_ty args)
131 {
132 if (!validate_args(args->posonlyargs) || !validate_args(args->args)) {
133 return 0;
134 }
135 if (args->vararg && args->vararg->annotation
136 && !validate_expr(args->vararg->annotation, Load)) {
137 return 0;
138 }
139 if (!validate_args(args->kwonlyargs))
140 return 0;
141 if (args->kwarg && args->kwarg->annotation
142 && !validate_expr(args->kwarg->annotation, Load)) {
143 return 0;
144 }
145 if (asdl_seq_LEN(args->defaults) > asdl_seq_LEN(args->posonlyargs) + asdl_seq_LEN(args->args)) {
146 PyErr_SetString(PyExc_ValueError, "more positional defaults than args on arguments");
147 return 0;
148 }
149 if (asdl_seq_LEN(args->kw_defaults) != asdl_seq_LEN(args->kwonlyargs)) {
150 PyErr_SetString(PyExc_ValueError, "length of kwonlyargs is not the same as "
151 "kw_defaults on arguments");
152 return 0;
153 }
154 return validate_exprs(args->defaults, Load, 0) && validate_exprs(args->kw_defaults, Load, 1);
155 }
156
157 static int
validate_constant(PyObject * value)158 validate_constant(PyObject *value)
159 {
160 if (value == Py_None || value == Py_Ellipsis)
161 return 1;
162
163 if (PyLong_CheckExact(value)
164 || PyFloat_CheckExact(value)
165 || PyComplex_CheckExact(value)
166 || PyBool_Check(value)
167 || PyUnicode_CheckExact(value)
168 || PyBytes_CheckExact(value))
169 return 1;
170
171 if (PyTuple_CheckExact(value) || PyFrozenSet_CheckExact(value)) {
172 PyObject *it;
173
174 it = PyObject_GetIter(value);
175 if (it == NULL)
176 return 0;
177
178 while (1) {
179 PyObject *item = PyIter_Next(it);
180 if (item == NULL) {
181 if (PyErr_Occurred()) {
182 Py_DECREF(it);
183 return 0;
184 }
185 break;
186 }
187
188 if (!validate_constant(item)) {
189 Py_DECREF(it);
190 Py_DECREF(item);
191 return 0;
192 }
193 Py_DECREF(item);
194 }
195
196 Py_DECREF(it);
197 return 1;
198 }
199
200 return 0;
201 }
202
203 static int
validate_expr(expr_ty exp,expr_context_ty ctx)204 validate_expr(expr_ty exp, expr_context_ty ctx)
205 {
206 int check_ctx = 1;
207 expr_context_ty actual_ctx;
208
209 /* First check expression context. */
210 switch (exp->kind) {
211 case Attribute_kind:
212 actual_ctx = exp->v.Attribute.ctx;
213 break;
214 case Subscript_kind:
215 actual_ctx = exp->v.Subscript.ctx;
216 break;
217 case Starred_kind:
218 actual_ctx = exp->v.Starred.ctx;
219 break;
220 case Name_kind:
221 if (!validate_name(exp->v.Name.id)) {
222 return 0;
223 }
224 actual_ctx = exp->v.Name.ctx;
225 break;
226 case List_kind:
227 actual_ctx = exp->v.List.ctx;
228 break;
229 case Tuple_kind:
230 actual_ctx = exp->v.Tuple.ctx;
231 break;
232 default:
233 if (ctx != Load) {
234 PyErr_Format(PyExc_ValueError, "expression which can't be "
235 "assigned to in %s context", expr_context_name(ctx));
236 return 0;
237 }
238 check_ctx = 0;
239 /* set actual_ctx to prevent gcc warning */
240 actual_ctx = 0;
241 }
242 if (check_ctx && actual_ctx != ctx) {
243 PyErr_Format(PyExc_ValueError, "expression must have %s context but has %s instead",
244 expr_context_name(ctx), expr_context_name(actual_ctx));
245 return 0;
246 }
247
248 /* Now validate expression. */
249 switch (exp->kind) {
250 case BoolOp_kind:
251 if (asdl_seq_LEN(exp->v.BoolOp.values) < 2) {
252 PyErr_SetString(PyExc_ValueError, "BoolOp with less than 2 values");
253 return 0;
254 }
255 return validate_exprs(exp->v.BoolOp.values, Load, 0);
256 case BinOp_kind:
257 return validate_expr(exp->v.BinOp.left, Load) &&
258 validate_expr(exp->v.BinOp.right, Load);
259 case UnaryOp_kind:
260 return validate_expr(exp->v.UnaryOp.operand, Load);
261 case Lambda_kind:
262 return validate_arguments(exp->v.Lambda.args) &&
263 validate_expr(exp->v.Lambda.body, Load);
264 case IfExp_kind:
265 return validate_expr(exp->v.IfExp.test, Load) &&
266 validate_expr(exp->v.IfExp.body, Load) &&
267 validate_expr(exp->v.IfExp.orelse, Load);
268 case Dict_kind:
269 if (asdl_seq_LEN(exp->v.Dict.keys) != asdl_seq_LEN(exp->v.Dict.values)) {
270 PyErr_SetString(PyExc_ValueError,
271 "Dict doesn't have the same number of keys as values");
272 return 0;
273 }
274 /* null_ok=1 for keys expressions to allow dict unpacking to work in
275 dict literals, i.e. ``{**{a:b}}`` */
276 return validate_exprs(exp->v.Dict.keys, Load, /*null_ok=*/ 1) &&
277 validate_exprs(exp->v.Dict.values, Load, /*null_ok=*/ 0);
278 case Set_kind:
279 return validate_exprs(exp->v.Set.elts, Load, 0);
280 #define COMP(NAME) \
281 case NAME ## _kind: \
282 return validate_comprehension(exp->v.NAME.generators) && \
283 validate_expr(exp->v.NAME.elt, Load);
284 COMP(ListComp)
285 COMP(SetComp)
286 COMP(GeneratorExp)
287 #undef COMP
288 case DictComp_kind:
289 return validate_comprehension(exp->v.DictComp.generators) &&
290 validate_expr(exp->v.DictComp.key, Load) &&
291 validate_expr(exp->v.DictComp.value, Load);
292 case Yield_kind:
293 return !exp->v.Yield.value || validate_expr(exp->v.Yield.value, Load);
294 case YieldFrom_kind:
295 return validate_expr(exp->v.YieldFrom.value, Load);
296 case Await_kind:
297 return validate_expr(exp->v.Await.value, Load);
298 case Compare_kind:
299 if (!asdl_seq_LEN(exp->v.Compare.comparators)) {
300 PyErr_SetString(PyExc_ValueError, "Compare with no comparators");
301 return 0;
302 }
303 if (asdl_seq_LEN(exp->v.Compare.comparators) !=
304 asdl_seq_LEN(exp->v.Compare.ops)) {
305 PyErr_SetString(PyExc_ValueError, "Compare has a different number "
306 "of comparators and operands");
307 return 0;
308 }
309 return validate_exprs(exp->v.Compare.comparators, Load, 0) &&
310 validate_expr(exp->v.Compare.left, Load);
311 case Call_kind:
312 return validate_expr(exp->v.Call.func, Load) &&
313 validate_exprs(exp->v.Call.args, Load, 0) &&
314 validate_keywords(exp->v.Call.keywords);
315 case Constant_kind:
316 if (!validate_constant(exp->v.Constant.value)) {
317 PyErr_Format(PyExc_TypeError,
318 "got an invalid type in Constant: %s",
319 Py_TYPE(exp->v.Constant.value)->tp_name);
320 return 0;
321 }
322 return 1;
323 case JoinedStr_kind:
324 return validate_exprs(exp->v.JoinedStr.values, Load, 0);
325 case FormattedValue_kind:
326 if (validate_expr(exp->v.FormattedValue.value, Load) == 0)
327 return 0;
328 if (exp->v.FormattedValue.format_spec)
329 return validate_expr(exp->v.FormattedValue.format_spec, Load);
330 return 1;
331 case Attribute_kind:
332 return validate_expr(exp->v.Attribute.value, Load);
333 case Subscript_kind:
334 return validate_slice(exp->v.Subscript.slice) &&
335 validate_expr(exp->v.Subscript.value, Load);
336 case Starred_kind:
337 return validate_expr(exp->v.Starred.value, ctx);
338 case List_kind:
339 return validate_exprs(exp->v.List.elts, ctx, 0);
340 case Tuple_kind:
341 return validate_exprs(exp->v.Tuple.elts, ctx, 0);
342 case NamedExpr_kind:
343 return validate_expr(exp->v.NamedExpr.value, Load);
344 /* This last case doesn't have any checking. */
345 case Name_kind:
346 return 1;
347 }
348 PyErr_SetString(PyExc_SystemError, "unexpected expression");
349 return 0;
350 }
351
352 static int
validate_nonempty_seq(asdl_seq * seq,const char * what,const char * owner)353 validate_nonempty_seq(asdl_seq *seq, const char *what, const char *owner)
354 {
355 if (asdl_seq_LEN(seq))
356 return 1;
357 PyErr_Format(PyExc_ValueError, "empty %s on %s", what, owner);
358 return 0;
359 }
360
361 static int
validate_assignlist(asdl_seq * targets,expr_context_ty ctx)362 validate_assignlist(asdl_seq *targets, expr_context_ty ctx)
363 {
364 return validate_nonempty_seq(targets, "targets", ctx == Del ? "Delete" : "Assign") &&
365 validate_exprs(targets, ctx, 0);
366 }
367
368 static int
validate_body(asdl_seq * body,const char * owner)369 validate_body(asdl_seq *body, const char *owner)
370 {
371 return validate_nonempty_seq(body, "body", owner) && validate_stmts(body);
372 }
373
374 static int
validate_stmt(stmt_ty stmt)375 validate_stmt(stmt_ty stmt)
376 {
377 Py_ssize_t i;
378 switch (stmt->kind) {
379 case FunctionDef_kind:
380 return validate_body(stmt->v.FunctionDef.body, "FunctionDef") &&
381 validate_arguments(stmt->v.FunctionDef.args) &&
382 validate_exprs(stmt->v.FunctionDef.decorator_list, Load, 0) &&
383 (!stmt->v.FunctionDef.returns ||
384 validate_expr(stmt->v.FunctionDef.returns, Load));
385 case ClassDef_kind:
386 return validate_body(stmt->v.ClassDef.body, "ClassDef") &&
387 validate_exprs(stmt->v.ClassDef.bases, Load, 0) &&
388 validate_keywords(stmt->v.ClassDef.keywords) &&
389 validate_exprs(stmt->v.ClassDef.decorator_list, Load, 0);
390 case Return_kind:
391 return !stmt->v.Return.value || validate_expr(stmt->v.Return.value, Load);
392 case Delete_kind:
393 return validate_assignlist(stmt->v.Delete.targets, Del);
394 case Assign_kind:
395 return validate_assignlist(stmt->v.Assign.targets, Store) &&
396 validate_expr(stmt->v.Assign.value, Load);
397 case AugAssign_kind:
398 return validate_expr(stmt->v.AugAssign.target, Store) &&
399 validate_expr(stmt->v.AugAssign.value, Load);
400 case AnnAssign_kind:
401 if (stmt->v.AnnAssign.target->kind != Name_kind &&
402 stmt->v.AnnAssign.simple) {
403 PyErr_SetString(PyExc_TypeError,
404 "AnnAssign with simple non-Name target");
405 return 0;
406 }
407 return validate_expr(stmt->v.AnnAssign.target, Store) &&
408 (!stmt->v.AnnAssign.value ||
409 validate_expr(stmt->v.AnnAssign.value, Load)) &&
410 validate_expr(stmt->v.AnnAssign.annotation, Load);
411 case For_kind:
412 return validate_expr(stmt->v.For.target, Store) &&
413 validate_expr(stmt->v.For.iter, Load) &&
414 validate_body(stmt->v.For.body, "For") &&
415 validate_stmts(stmt->v.For.orelse);
416 case AsyncFor_kind:
417 return validate_expr(stmt->v.AsyncFor.target, Store) &&
418 validate_expr(stmt->v.AsyncFor.iter, Load) &&
419 validate_body(stmt->v.AsyncFor.body, "AsyncFor") &&
420 validate_stmts(stmt->v.AsyncFor.orelse);
421 case While_kind:
422 return validate_expr(stmt->v.While.test, Load) &&
423 validate_body(stmt->v.While.body, "While") &&
424 validate_stmts(stmt->v.While.orelse);
425 case If_kind:
426 return validate_expr(stmt->v.If.test, Load) &&
427 validate_body(stmt->v.If.body, "If") &&
428 validate_stmts(stmt->v.If.orelse);
429 case With_kind:
430 if (!validate_nonempty_seq(stmt->v.With.items, "items", "With"))
431 return 0;
432 for (i = 0; i < asdl_seq_LEN(stmt->v.With.items); i++) {
433 withitem_ty item = asdl_seq_GET(stmt->v.With.items, i);
434 if (!validate_expr(item->context_expr, Load) ||
435 (item->optional_vars && !validate_expr(item->optional_vars, Store)))
436 return 0;
437 }
438 return validate_body(stmt->v.With.body, "With");
439 case AsyncWith_kind:
440 if (!validate_nonempty_seq(stmt->v.AsyncWith.items, "items", "AsyncWith"))
441 return 0;
442 for (i = 0; i < asdl_seq_LEN(stmt->v.AsyncWith.items); i++) {
443 withitem_ty item = asdl_seq_GET(stmt->v.AsyncWith.items, i);
444 if (!validate_expr(item->context_expr, Load) ||
445 (item->optional_vars && !validate_expr(item->optional_vars, Store)))
446 return 0;
447 }
448 return validate_body(stmt->v.AsyncWith.body, "AsyncWith");
449 case Raise_kind:
450 if (stmt->v.Raise.exc) {
451 return validate_expr(stmt->v.Raise.exc, Load) &&
452 (!stmt->v.Raise.cause || validate_expr(stmt->v.Raise.cause, Load));
453 }
454 if (stmt->v.Raise.cause) {
455 PyErr_SetString(PyExc_ValueError, "Raise with cause but no exception");
456 return 0;
457 }
458 return 1;
459 case Try_kind:
460 if (!validate_body(stmt->v.Try.body, "Try"))
461 return 0;
462 if (!asdl_seq_LEN(stmt->v.Try.handlers) &&
463 !asdl_seq_LEN(stmt->v.Try.finalbody)) {
464 PyErr_SetString(PyExc_ValueError, "Try has neither except handlers nor finalbody");
465 return 0;
466 }
467 if (!asdl_seq_LEN(stmt->v.Try.handlers) &&
468 asdl_seq_LEN(stmt->v.Try.orelse)) {
469 PyErr_SetString(PyExc_ValueError, "Try has orelse but no except handlers");
470 return 0;
471 }
472 for (i = 0; i < asdl_seq_LEN(stmt->v.Try.handlers); i++) {
473 excepthandler_ty handler = asdl_seq_GET(stmt->v.Try.handlers, i);
474 if ((handler->v.ExceptHandler.type &&
475 !validate_expr(handler->v.ExceptHandler.type, Load)) ||
476 !validate_body(handler->v.ExceptHandler.body, "ExceptHandler"))
477 return 0;
478 }
479 return (!asdl_seq_LEN(stmt->v.Try.finalbody) ||
480 validate_stmts(stmt->v.Try.finalbody)) &&
481 (!asdl_seq_LEN(stmt->v.Try.orelse) ||
482 validate_stmts(stmt->v.Try.orelse));
483 case Assert_kind:
484 return validate_expr(stmt->v.Assert.test, Load) &&
485 (!stmt->v.Assert.msg || validate_expr(stmt->v.Assert.msg, Load));
486 case Import_kind:
487 return validate_nonempty_seq(stmt->v.Import.names, "names", "Import");
488 case ImportFrom_kind:
489 if (stmt->v.ImportFrom.level < 0) {
490 PyErr_SetString(PyExc_ValueError, "Negative ImportFrom level");
491 return 0;
492 }
493 return validate_nonempty_seq(stmt->v.ImportFrom.names, "names", "ImportFrom");
494 case Global_kind:
495 return validate_nonempty_seq(stmt->v.Global.names, "names", "Global");
496 case Nonlocal_kind:
497 return validate_nonempty_seq(stmt->v.Nonlocal.names, "names", "Nonlocal");
498 case Expr_kind:
499 return validate_expr(stmt->v.Expr.value, Load);
500 case AsyncFunctionDef_kind:
501 return validate_body(stmt->v.AsyncFunctionDef.body, "AsyncFunctionDef") &&
502 validate_arguments(stmt->v.AsyncFunctionDef.args) &&
503 validate_exprs(stmt->v.AsyncFunctionDef.decorator_list, Load, 0) &&
504 (!stmt->v.AsyncFunctionDef.returns ||
505 validate_expr(stmt->v.AsyncFunctionDef.returns, Load));
506 case Pass_kind:
507 case Break_kind:
508 case Continue_kind:
509 return 1;
510 default:
511 PyErr_SetString(PyExc_SystemError, "unexpected statement");
512 return 0;
513 }
514 }
515
516 static int
validate_stmts(asdl_seq * seq)517 validate_stmts(asdl_seq *seq)
518 {
519 Py_ssize_t i;
520 for (i = 0; i < asdl_seq_LEN(seq); i++) {
521 stmt_ty stmt = asdl_seq_GET(seq, i);
522 if (stmt) {
523 if (!validate_stmt(stmt))
524 return 0;
525 }
526 else {
527 PyErr_SetString(PyExc_ValueError,
528 "None disallowed in statement list");
529 return 0;
530 }
531 }
532 return 1;
533 }
534
535 static int
validate_exprs(asdl_seq * exprs,expr_context_ty ctx,int null_ok)536 validate_exprs(asdl_seq *exprs, expr_context_ty ctx, int null_ok)
537 {
538 Py_ssize_t i;
539 for (i = 0; i < asdl_seq_LEN(exprs); i++) {
540 expr_ty expr = asdl_seq_GET(exprs, i);
541 if (expr) {
542 if (!validate_expr(expr, ctx))
543 return 0;
544 }
545 else if (!null_ok) {
546 PyErr_SetString(PyExc_ValueError,
547 "None disallowed in expression list");
548 return 0;
549 }
550
551 }
552 return 1;
553 }
554
555 int
PyAST_Validate(mod_ty mod)556 PyAST_Validate(mod_ty mod)
557 {
558 int res = 0;
559
560 switch (mod->kind) {
561 case Module_kind:
562 res = validate_stmts(mod->v.Module.body);
563 break;
564 case Interactive_kind:
565 res = validate_stmts(mod->v.Interactive.body);
566 break;
567 case Expression_kind:
568 res = validate_expr(mod->v.Expression.body, Load);
569 break;
570 case Suite_kind:
571 PyErr_SetString(PyExc_ValueError, "Suite is not valid in the CPython compiler");
572 break;
573 default:
574 PyErr_SetString(PyExc_SystemError, "impossible module node");
575 res = 0;
576 break;
577 }
578 return res;
579 }
580
581 /* This is done here, so defines like "test" don't interfere with AST use above. */
582 #include "grammar.h"
583 #include "parsetok.h"
584 #include "graminit.h"
585
586 /* Data structure used internally */
587 struct compiling {
588 PyArena *c_arena; /* Arena for allocating memory. */
589 PyObject *c_filename; /* filename */
590 PyObject *c_normalize; /* Normalization function from unicodedata. */
591 int c_feature_version; /* Latest minor version of Python for allowed features */
592 };
593
594 static asdl_seq *seq_for_testlist(struct compiling *, const node *);
595 static expr_ty ast_for_expr(struct compiling *, const node *);
596 static stmt_ty ast_for_stmt(struct compiling *, const node *);
597 static asdl_seq *ast_for_suite(struct compiling *c, const node *n);
598 static asdl_seq *ast_for_exprlist(struct compiling *, const node *,
599 expr_context_ty);
600 static expr_ty ast_for_testlist(struct compiling *, const node *);
601 static stmt_ty ast_for_classdef(struct compiling *, const node *, asdl_seq *);
602
603 static stmt_ty ast_for_with_stmt(struct compiling *, const node *, bool);
604 static stmt_ty ast_for_for_stmt(struct compiling *, const node *, bool);
605
606 /* Note different signature for ast_for_call */
607 static expr_ty ast_for_call(struct compiling *, const node *, expr_ty,
608 const node *, const node *, const node *);
609
610 static PyObject *parsenumber(struct compiling *, const char *);
611 static expr_ty parsestrplus(struct compiling *, const node *n);
612 static void get_last_end_pos(asdl_seq *, int *, int *);
613
614 #define COMP_GENEXP 0
615 #define COMP_LISTCOMP 1
616 #define COMP_SETCOMP 2
617
618 static int
init_normalization(struct compiling * c)619 init_normalization(struct compiling *c)
620 {
621 PyObject *m = PyImport_ImportModuleNoBlock("unicodedata");
622 if (!m)
623 return 0;
624 c->c_normalize = PyObject_GetAttrString(m, "normalize");
625 Py_DECREF(m);
626 if (!c->c_normalize)
627 return 0;
628 return 1;
629 }
630
631 static identifier
new_identifier(const char * n,struct compiling * c)632 new_identifier(const char *n, struct compiling *c)
633 {
634 PyObject *id = PyUnicode_DecodeUTF8(n, strlen(n), NULL);
635 if (!id)
636 return NULL;
637 /* PyUnicode_DecodeUTF8 should always return a ready string. */
638 assert(PyUnicode_IS_READY(id));
639 /* Check whether there are non-ASCII characters in the
640 identifier; if so, normalize to NFKC. */
641 if (!PyUnicode_IS_ASCII(id)) {
642 PyObject *id2;
643 _Py_IDENTIFIER(NFKC);
644 if (!c->c_normalize && !init_normalization(c)) {
645 Py_DECREF(id);
646 return NULL;
647 }
648 PyObject *form = _PyUnicode_FromId(&PyId_NFKC);
649 if (form == NULL) {
650 Py_DECREF(id);
651 return NULL;
652 }
653 PyObject *args[2] = {form, id};
654 id2 = _PyObject_FastCall(c->c_normalize, args, 2);
655 Py_DECREF(id);
656 if (!id2)
657 return NULL;
658 if (!PyUnicode_Check(id2)) {
659 PyErr_Format(PyExc_TypeError,
660 "unicodedata.normalize() must return a string, not "
661 "%.200s",
662 Py_TYPE(id2)->tp_name);
663 Py_DECREF(id2);
664 return NULL;
665 }
666 id = id2;
667 }
668 PyUnicode_InternInPlace(&id);
669 if (PyArena_AddPyObject(c->c_arena, id) < 0) {
670 Py_DECREF(id);
671 return NULL;
672 }
673 return id;
674 }
675
676 #define NEW_IDENTIFIER(n) new_identifier(STR(n), c)
677
678 static int
ast_error(struct compiling * c,const node * n,const char * errmsg,...)679 ast_error(struct compiling *c, const node *n, const char *errmsg, ...)
680 {
681 PyObject *value, *errstr, *loc, *tmp;
682 va_list va;
683
684 va_start(va, errmsg);
685 errstr = PyUnicode_FromFormatV(errmsg, va);
686 va_end(va);
687 if (!errstr) {
688 return 0;
689 }
690 loc = PyErr_ProgramTextObject(c->c_filename, LINENO(n));
691 if (!loc) {
692 Py_INCREF(Py_None);
693 loc = Py_None;
694 }
695 tmp = Py_BuildValue("(OiiN)", c->c_filename, LINENO(n), n->n_col_offset + 1, loc);
696 if (!tmp) {
697 Py_DECREF(errstr);
698 return 0;
699 }
700 value = PyTuple_Pack(2, errstr, tmp);
701 Py_DECREF(errstr);
702 Py_DECREF(tmp);
703 if (value) {
704 PyErr_SetObject(PyExc_SyntaxError, value);
705 Py_DECREF(value);
706 }
707 return 0;
708 }
709
710 /* num_stmts() returns number of contained statements.
711
712 Use this routine to determine how big a sequence is needed for
713 the statements in a parse tree. Its raison d'etre is this bit of
714 grammar:
715
716 stmt: simple_stmt | compound_stmt
717 simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
718
719 A simple_stmt can contain multiple small_stmt elements joined
720 by semicolons. If the arg is a simple_stmt, the number of
721 small_stmt elements is returned.
722 */
723
724 static string
new_type_comment(const char * s,struct compiling * c)725 new_type_comment(const char *s, struct compiling *c)
726 {
727 PyObject *res = PyUnicode_DecodeUTF8(s, strlen(s), NULL);
728 if (res == NULL)
729 return NULL;
730 if (PyArena_AddPyObject(c->c_arena, res) < 0) {
731 Py_DECREF(res);
732 return NULL;
733 }
734 return res;
735 }
736 #define NEW_TYPE_COMMENT(n) new_type_comment(STR(n), c)
737
738 static int
num_stmts(const node * n)739 num_stmts(const node *n)
740 {
741 int i, l;
742 node *ch;
743
744 switch (TYPE(n)) {
745 case single_input:
746 if (TYPE(CHILD(n, 0)) == NEWLINE)
747 return 0;
748 else
749 return num_stmts(CHILD(n, 0));
750 case file_input:
751 l = 0;
752 for (i = 0; i < NCH(n); i++) {
753 ch = CHILD(n, i);
754 if (TYPE(ch) == stmt)
755 l += num_stmts(ch);
756 }
757 return l;
758 case stmt:
759 return num_stmts(CHILD(n, 0));
760 case compound_stmt:
761 return 1;
762 case simple_stmt:
763 return NCH(n) / 2; /* Divide by 2 to remove count of semi-colons */
764 case suite:
765 case func_body_suite:
766 /* func_body_suite: simple_stmt | NEWLINE [TYPE_COMMENT NEWLINE] INDENT stmt+ DEDENT */
767 /* suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT */
768 if (NCH(n) == 1)
769 return num_stmts(CHILD(n, 0));
770 else {
771 i = 2;
772 l = 0;
773 if (TYPE(CHILD(n, 1)) == TYPE_COMMENT)
774 i += 2;
775 for (; i < (NCH(n) - 1); i++)
776 l += num_stmts(CHILD(n, i));
777 return l;
778 }
779 default: {
780 char buf[128];
781
782 sprintf(buf, "Non-statement found: %d %d",
783 TYPE(n), NCH(n));
784 Py_FatalError(buf);
785 }
786 }
787 Py_UNREACHABLE();
788 }
789
790 /* Transform the CST rooted at node * to the appropriate AST
791 */
792
793 mod_ty
PyAST_FromNodeObject(const node * n,PyCompilerFlags * flags,PyObject * filename,PyArena * arena)794 PyAST_FromNodeObject(const node *n, PyCompilerFlags *flags,
795 PyObject *filename, PyArena *arena)
796 {
797 int i, j, k, num;
798 asdl_seq *stmts = NULL;
799 asdl_seq *type_ignores = NULL;
800 stmt_ty s;
801 node *ch;
802 struct compiling c;
803 mod_ty res = NULL;
804 asdl_seq *argtypes = NULL;
805 expr_ty ret, arg;
806
807 c.c_arena = arena;
808 /* borrowed reference */
809 c.c_filename = filename;
810 c.c_normalize = NULL;
811 c.c_feature_version = flags && (flags->cf_flags & PyCF_ONLY_AST) ?
812 flags->cf_feature_version : PY_MINOR_VERSION;
813
814 if (TYPE(n) == encoding_decl)
815 n = CHILD(n, 0);
816
817 k = 0;
818 switch (TYPE(n)) {
819 case file_input:
820 stmts = _Py_asdl_seq_new(num_stmts(n), arena);
821 if (!stmts)
822 goto out;
823 for (i = 0; i < NCH(n) - 1; i++) {
824 ch = CHILD(n, i);
825 if (TYPE(ch) == NEWLINE)
826 continue;
827 REQ(ch, stmt);
828 num = num_stmts(ch);
829 if (num == 1) {
830 s = ast_for_stmt(&c, ch);
831 if (!s)
832 goto out;
833 asdl_seq_SET(stmts, k++, s);
834 }
835 else {
836 ch = CHILD(ch, 0);
837 REQ(ch, simple_stmt);
838 for (j = 0; j < num; j++) {
839 s = ast_for_stmt(&c, CHILD(ch, j * 2));
840 if (!s)
841 goto out;
842 asdl_seq_SET(stmts, k++, s);
843 }
844 }
845 }
846
847 /* Type ignores are stored under the ENDMARKER in file_input. */
848 ch = CHILD(n, NCH(n) - 1);
849 REQ(ch, ENDMARKER);
850 num = NCH(ch);
851 type_ignores = _Py_asdl_seq_new(num, arena);
852 if (!type_ignores)
853 goto out;
854
855 for (i = 0; i < num; i++) {
856 string type_comment = new_type_comment(STR(CHILD(ch, i)), &c);
857 if (!type_comment)
858 goto out;
859 type_ignore_ty ti = TypeIgnore(LINENO(CHILD(ch, i)), type_comment, arena);
860 if (!ti)
861 goto out;
862 asdl_seq_SET(type_ignores, i, ti);
863 }
864
865 res = Module(stmts, type_ignores, arena);
866 break;
867 case eval_input: {
868 expr_ty testlist_ast;
869
870 /* XXX Why not comp_for here? */
871 testlist_ast = ast_for_testlist(&c, CHILD(n, 0));
872 if (!testlist_ast)
873 goto out;
874 res = Expression(testlist_ast, arena);
875 break;
876 }
877 case single_input:
878 if (TYPE(CHILD(n, 0)) == NEWLINE) {
879 stmts = _Py_asdl_seq_new(1, arena);
880 if (!stmts)
881 goto out;
882 asdl_seq_SET(stmts, 0, Pass(n->n_lineno, n->n_col_offset,
883 n->n_end_lineno, n->n_end_col_offset,
884 arena));
885 if (!asdl_seq_GET(stmts, 0))
886 goto out;
887 res = Interactive(stmts, arena);
888 }
889 else {
890 n = CHILD(n, 0);
891 num = num_stmts(n);
892 stmts = _Py_asdl_seq_new(num, arena);
893 if (!stmts)
894 goto out;
895 if (num == 1) {
896 s = ast_for_stmt(&c, n);
897 if (!s)
898 goto out;
899 asdl_seq_SET(stmts, 0, s);
900 }
901 else {
902 /* Only a simple_stmt can contain multiple statements. */
903 REQ(n, simple_stmt);
904 for (i = 0; i < NCH(n); i += 2) {
905 if (TYPE(CHILD(n, i)) == NEWLINE)
906 break;
907 s = ast_for_stmt(&c, CHILD(n, i));
908 if (!s)
909 goto out;
910 asdl_seq_SET(stmts, i / 2, s);
911 }
912 }
913
914 res = Interactive(stmts, arena);
915 }
916 break;
917 case func_type_input:
918 n = CHILD(n, 0);
919 REQ(n, func_type);
920
921 if (TYPE(CHILD(n, 1)) == typelist) {
922 ch = CHILD(n, 1);
923 /* this is overly permissive -- we don't pay any attention to
924 * stars on the args -- just parse them into an ordered list */
925 num = 0;
926 for (i = 0; i < NCH(ch); i++) {
927 if (TYPE(CHILD(ch, i)) == test) {
928 num++;
929 }
930 }
931
932 argtypes = _Py_asdl_seq_new(num, arena);
933 if (!argtypes)
934 goto out;
935
936 j = 0;
937 for (i = 0; i < NCH(ch); i++) {
938 if (TYPE(CHILD(ch, i)) == test) {
939 arg = ast_for_expr(&c, CHILD(ch, i));
940 if (!arg)
941 goto out;
942 asdl_seq_SET(argtypes, j++, arg);
943 }
944 }
945 }
946 else {
947 argtypes = _Py_asdl_seq_new(0, arena);
948 if (!argtypes)
949 goto out;
950 }
951
952 ret = ast_for_expr(&c, CHILD(n, NCH(n) - 1));
953 if (!ret)
954 goto out;
955 res = FunctionType(argtypes, ret, arena);
956 break;
957 default:
958 PyErr_Format(PyExc_SystemError,
959 "invalid node %d for PyAST_FromNode", TYPE(n));
960 goto out;
961 }
962 out:
963 if (c.c_normalize) {
964 Py_DECREF(c.c_normalize);
965 }
966 return res;
967 }
968
969 mod_ty
PyAST_FromNode(const node * n,PyCompilerFlags * flags,const char * filename_str,PyArena * arena)970 PyAST_FromNode(const node *n, PyCompilerFlags *flags, const char *filename_str,
971 PyArena *arena)
972 {
973 mod_ty mod;
974 PyObject *filename;
975 filename = PyUnicode_DecodeFSDefault(filename_str);
976 if (filename == NULL)
977 return NULL;
978 mod = PyAST_FromNodeObject(n, flags, filename, arena);
979 Py_DECREF(filename);
980 return mod;
981
982 }
983
984 /* Return the AST repr. of the operator represented as syntax (|, ^, etc.)
985 */
986
987 static operator_ty
get_operator(struct compiling * c,const node * n)988 get_operator(struct compiling *c, const node *n)
989 {
990 switch (TYPE(n)) {
991 case VBAR:
992 return BitOr;
993 case CIRCUMFLEX:
994 return BitXor;
995 case AMPER:
996 return BitAnd;
997 case LEFTSHIFT:
998 return LShift;
999 case RIGHTSHIFT:
1000 return RShift;
1001 case PLUS:
1002 return Add;
1003 case MINUS:
1004 return Sub;
1005 case STAR:
1006 return Mult;
1007 case AT:
1008 if (c->c_feature_version < 5) {
1009 ast_error(c, n,
1010 "The '@' operator is only supported in Python 3.5 and greater");
1011 return (operator_ty)0;
1012 }
1013 return MatMult;
1014 case SLASH:
1015 return Div;
1016 case DOUBLESLASH:
1017 return FloorDiv;
1018 case PERCENT:
1019 return Mod;
1020 default:
1021 return (operator_ty)0;
1022 }
1023 }
1024
1025 static const char * const FORBIDDEN[] = {
1026 "None",
1027 "True",
1028 "False",
1029 "__debug__",
1030 NULL,
1031 };
1032
1033 static int
forbidden_name(struct compiling * c,identifier name,const node * n,int full_checks)1034 forbidden_name(struct compiling *c, identifier name, const node *n,
1035 int full_checks)
1036 {
1037 assert(PyUnicode_Check(name));
1038 const char * const *p = FORBIDDEN;
1039 if (!full_checks) {
1040 /* In most cases, the parser will protect True, False, and None
1041 from being assign to. */
1042 p += 3;
1043 }
1044 for (; *p; p++) {
1045 if (_PyUnicode_EqualToASCIIString(name, *p)) {
1046 ast_error(c, n, "cannot assign to %U", name);
1047 return 1;
1048 }
1049 }
1050 return 0;
1051 }
1052
1053 static expr_ty
copy_location(expr_ty e,const node * n,const node * end)1054 copy_location(expr_ty e, const node *n, const node *end)
1055 {
1056 if (e) {
1057 e->lineno = LINENO(n);
1058 e->col_offset = n->n_col_offset;
1059 e->end_lineno = end->n_end_lineno;
1060 e->end_col_offset = end->n_end_col_offset;
1061 }
1062 return e;
1063 }
1064
1065 static const char *
get_expr_name(expr_ty e)1066 get_expr_name(expr_ty e)
1067 {
1068 switch (e->kind) {
1069 case Attribute_kind:
1070 return "attribute";
1071 case Subscript_kind:
1072 return "subscript";
1073 case Starred_kind:
1074 return "starred";
1075 case Name_kind:
1076 return "name";
1077 case List_kind:
1078 return "list";
1079 case Tuple_kind:
1080 return "tuple";
1081 case Lambda_kind:
1082 return "lambda";
1083 case Call_kind:
1084 return "function call";
1085 case BoolOp_kind:
1086 case BinOp_kind:
1087 case UnaryOp_kind:
1088 return "operator";
1089 case GeneratorExp_kind:
1090 return "generator expression";
1091 case Yield_kind:
1092 case YieldFrom_kind:
1093 return "yield expression";
1094 case Await_kind:
1095 return "await expression";
1096 case ListComp_kind:
1097 return "list comprehension";
1098 case SetComp_kind:
1099 return "set comprehension";
1100 case DictComp_kind:
1101 return "dict comprehension";
1102 case Dict_kind:
1103 return "dict display";
1104 case Set_kind:
1105 return "set display";
1106 case JoinedStr_kind:
1107 case FormattedValue_kind:
1108 return "f-string expression";
1109 case Constant_kind: {
1110 PyObject *value = e->v.Constant.value;
1111 if (value == Py_None) {
1112 return "None";
1113 }
1114 if (value == Py_False) {
1115 return "False";
1116 }
1117 if (value == Py_True) {
1118 return "True";
1119 }
1120 if (value == Py_Ellipsis) {
1121 return "Ellipsis";
1122 }
1123 return "literal";
1124 }
1125 case Compare_kind:
1126 return "comparison";
1127 case IfExp_kind:
1128 return "conditional expression";
1129 case NamedExpr_kind:
1130 return "named expression";
1131 default:
1132 PyErr_Format(PyExc_SystemError,
1133 "unexpected expression in assignment %d (line %d)",
1134 e->kind, e->lineno);
1135 return NULL;
1136 }
1137 }
1138
1139 /* Set the context ctx for expr_ty e, recursively traversing e.
1140
1141 Only sets context for expr kinds that "can appear in assignment context"
1142 (according to ../Parser/Python.asdl). For other expr kinds, it sets
1143 an appropriate syntax error and returns false.
1144 */
1145
1146 static int
set_context(struct compiling * c,expr_ty e,expr_context_ty ctx,const node * n)1147 set_context(struct compiling *c, expr_ty e, expr_context_ty ctx, const node *n)
1148 {
1149 asdl_seq *s = NULL;
1150
1151 /* The ast defines augmented store and load contexts, but the
1152 implementation here doesn't actually use them. The code may be
1153 a little more complex than necessary as a result. It also means
1154 that expressions in an augmented assignment have a Store context.
1155 Consider restructuring so that augmented assignment uses
1156 set_context(), too.
1157 */
1158 assert(ctx != AugStore && ctx != AugLoad);
1159
1160 switch (e->kind) {
1161 case Attribute_kind:
1162 e->v.Attribute.ctx = ctx;
1163 if (ctx == Store && forbidden_name(c, e->v.Attribute.attr, n, 1))
1164 return 0;
1165 break;
1166 case Subscript_kind:
1167 e->v.Subscript.ctx = ctx;
1168 break;
1169 case Starred_kind:
1170 e->v.Starred.ctx = ctx;
1171 if (!set_context(c, e->v.Starred.value, ctx, n))
1172 return 0;
1173 break;
1174 case Name_kind:
1175 if (ctx == Store) {
1176 if (forbidden_name(c, e->v.Name.id, n, 0))
1177 return 0; /* forbidden_name() calls ast_error() */
1178 }
1179 e->v.Name.ctx = ctx;
1180 break;
1181 case List_kind:
1182 e->v.List.ctx = ctx;
1183 s = e->v.List.elts;
1184 break;
1185 case Tuple_kind:
1186 e->v.Tuple.ctx = ctx;
1187 s = e->v.Tuple.elts;
1188 break;
1189 default: {
1190 const char *expr_name = get_expr_name(e);
1191 if (expr_name != NULL) {
1192 ast_error(c, n, "cannot %s %s",
1193 ctx == Store ? "assign to" : "delete",
1194 expr_name);
1195 }
1196 return 0;
1197 }
1198 }
1199
1200 /* If the LHS is a list or tuple, we need to set the assignment
1201 context for all the contained elements.
1202 */
1203 if (s) {
1204 Py_ssize_t i;
1205
1206 for (i = 0; i < asdl_seq_LEN(s); i++) {
1207 if (!set_context(c, (expr_ty)asdl_seq_GET(s, i), ctx, n))
1208 return 0;
1209 }
1210 }
1211 return 1;
1212 }
1213
1214 static operator_ty
ast_for_augassign(struct compiling * c,const node * n)1215 ast_for_augassign(struct compiling *c, const node *n)
1216 {
1217 REQ(n, augassign);
1218 n = CHILD(n, 0);
1219 switch (STR(n)[0]) {
1220 case '+':
1221 return Add;
1222 case '-':
1223 return Sub;
1224 case '/':
1225 if (STR(n)[1] == '/')
1226 return FloorDiv;
1227 else
1228 return Div;
1229 case '%':
1230 return Mod;
1231 case '<':
1232 return LShift;
1233 case '>':
1234 return RShift;
1235 case '&':
1236 return BitAnd;
1237 case '^':
1238 return BitXor;
1239 case '|':
1240 return BitOr;
1241 case '*':
1242 if (STR(n)[1] == '*')
1243 return Pow;
1244 else
1245 return Mult;
1246 case '@':
1247 if (c->c_feature_version < 5) {
1248 ast_error(c, n,
1249 "The '@' operator is only supported in Python 3.5 and greater");
1250 return (operator_ty)0;
1251 }
1252 return MatMult;
1253 default:
1254 PyErr_Format(PyExc_SystemError, "invalid augassign: %s", STR(n));
1255 return (operator_ty)0;
1256 }
1257 }
1258
1259 static cmpop_ty
ast_for_comp_op(struct compiling * c,const node * n)1260 ast_for_comp_op(struct compiling *c, const node *n)
1261 {
1262 /* comp_op: '<'|'>'|'=='|'>='|'<='|'!='|'in'|'not' 'in'|'is'
1263 |'is' 'not'
1264 */
1265 REQ(n, comp_op);
1266 if (NCH(n) == 1) {
1267 n = CHILD(n, 0);
1268 switch (TYPE(n)) {
1269 case LESS:
1270 return Lt;
1271 case GREATER:
1272 return Gt;
1273 case EQEQUAL: /* == */
1274 return Eq;
1275 case LESSEQUAL:
1276 return LtE;
1277 case GREATEREQUAL:
1278 return GtE;
1279 case NOTEQUAL:
1280 return NotEq;
1281 case NAME:
1282 if (strcmp(STR(n), "in") == 0)
1283 return In;
1284 if (strcmp(STR(n), "is") == 0)
1285 return Is;
1286 /* fall through */
1287 default:
1288 PyErr_Format(PyExc_SystemError, "invalid comp_op: %s",
1289 STR(n));
1290 return (cmpop_ty)0;
1291 }
1292 }
1293 else if (NCH(n) == 2) {
1294 /* handle "not in" and "is not" */
1295 switch (TYPE(CHILD(n, 0))) {
1296 case NAME:
1297 if (strcmp(STR(CHILD(n, 1)), "in") == 0)
1298 return NotIn;
1299 if (strcmp(STR(CHILD(n, 0)), "is") == 0)
1300 return IsNot;
1301 /* fall through */
1302 default:
1303 PyErr_Format(PyExc_SystemError, "invalid comp_op: %s %s",
1304 STR(CHILD(n, 0)), STR(CHILD(n, 1)));
1305 return (cmpop_ty)0;
1306 }
1307 }
1308 PyErr_Format(PyExc_SystemError, "invalid comp_op: has %d children",
1309 NCH(n));
1310 return (cmpop_ty)0;
1311 }
1312
1313 static asdl_seq *
seq_for_testlist(struct compiling * c,const node * n)1314 seq_for_testlist(struct compiling *c, const node *n)
1315 {
1316 /* testlist: test (',' test)* [',']
1317 testlist_star_expr: test|star_expr (',' test|star_expr)* [',']
1318 */
1319 asdl_seq *seq;
1320 expr_ty expression;
1321 int i;
1322 assert(TYPE(n) == testlist || TYPE(n) == testlist_star_expr || TYPE(n) == testlist_comp);
1323
1324 seq = _Py_asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
1325 if (!seq)
1326 return NULL;
1327
1328 for (i = 0; i < NCH(n); i += 2) {
1329 const node *ch = CHILD(n, i);
1330 assert(TYPE(ch) == test || TYPE(ch) == test_nocond || TYPE(ch) == star_expr || TYPE(ch) == namedexpr_test);
1331
1332 expression = ast_for_expr(c, ch);
1333 if (!expression)
1334 return NULL;
1335
1336 assert(i / 2 < seq->size);
1337 asdl_seq_SET(seq, i / 2, expression);
1338 }
1339 return seq;
1340 }
1341
1342 static arg_ty
ast_for_arg(struct compiling * c,const node * n)1343 ast_for_arg(struct compiling *c, const node *n)
1344 {
1345 identifier name;
1346 expr_ty annotation = NULL;
1347 node *ch;
1348 arg_ty ret;
1349
1350 assert(TYPE(n) == tfpdef || TYPE(n) == vfpdef);
1351 ch = CHILD(n, 0);
1352 name = NEW_IDENTIFIER(ch);
1353 if (!name)
1354 return NULL;
1355 if (forbidden_name(c, name, ch, 0))
1356 return NULL;
1357
1358 if (NCH(n) == 3 && TYPE(CHILD(n, 1)) == COLON) {
1359 annotation = ast_for_expr(c, CHILD(n, 2));
1360 if (!annotation)
1361 return NULL;
1362 }
1363
1364 ret = arg(name, annotation, NULL, LINENO(n), n->n_col_offset,
1365 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
1366 if (!ret)
1367 return NULL;
1368 return ret;
1369 }
1370
1371 /* returns -1 if failed to handle keyword only arguments
1372 returns new position to keep processing if successful
1373 (',' tfpdef ['=' test])*
1374 ^^^
1375 start pointing here
1376 */
1377 static int
handle_keywordonly_args(struct compiling * c,const node * n,int start,asdl_seq * kwonlyargs,asdl_seq * kwdefaults)1378 handle_keywordonly_args(struct compiling *c, const node *n, int start,
1379 asdl_seq *kwonlyargs, asdl_seq *kwdefaults)
1380 {
1381 PyObject *argname;
1382 node *ch;
1383 expr_ty expression, annotation;
1384 arg_ty arg = NULL;
1385 int i = start;
1386 int j = 0; /* index for kwdefaults and kwonlyargs */
1387
1388 if (kwonlyargs == NULL) {
1389 ast_error(c, CHILD(n, start), "named arguments must follow bare *");
1390 return -1;
1391 }
1392 assert(kwdefaults != NULL);
1393 while (i < NCH(n)) {
1394 ch = CHILD(n, i);
1395 switch (TYPE(ch)) {
1396 case vfpdef:
1397 case tfpdef:
1398 if (i + 1 < NCH(n) && TYPE(CHILD(n, i + 1)) == EQUAL) {
1399 expression = ast_for_expr(c, CHILD(n, i + 2));
1400 if (!expression)
1401 goto error;
1402 asdl_seq_SET(kwdefaults, j, expression);
1403 i += 2; /* '=' and test */
1404 }
1405 else { /* setting NULL if no default value exists */
1406 asdl_seq_SET(kwdefaults, j, NULL);
1407 }
1408 if (NCH(ch) == 3) {
1409 /* ch is NAME ':' test */
1410 annotation = ast_for_expr(c, CHILD(ch, 2));
1411 if (!annotation)
1412 goto error;
1413 }
1414 else {
1415 annotation = NULL;
1416 }
1417 ch = CHILD(ch, 0);
1418 argname = NEW_IDENTIFIER(ch);
1419 if (!argname)
1420 goto error;
1421 if (forbidden_name(c, argname, ch, 0))
1422 goto error;
1423 arg = arg(argname, annotation, NULL, LINENO(ch), ch->n_col_offset,
1424 ch->n_end_lineno, ch->n_end_col_offset,
1425 c->c_arena);
1426 if (!arg)
1427 goto error;
1428 asdl_seq_SET(kwonlyargs, j++, arg);
1429 i += 1; /* the name */
1430 if (i < NCH(n) && TYPE(CHILD(n, i)) == COMMA)
1431 i += 1; /* the comma, if present */
1432 break;
1433 case TYPE_COMMENT:
1434 /* arg will be equal to the last argument processed */
1435 arg->type_comment = NEW_TYPE_COMMENT(ch);
1436 if (!arg->type_comment)
1437 goto error;
1438 i += 1;
1439 break;
1440 case DOUBLESTAR:
1441 return i;
1442 default:
1443 ast_error(c, ch, "unexpected node");
1444 goto error;
1445 }
1446 }
1447 return i;
1448 error:
1449 return -1;
1450 }
1451
1452 /* Create AST for argument list. */
1453
1454 static arguments_ty
ast_for_arguments(struct compiling * c,const node * n)1455 ast_for_arguments(struct compiling *c, const node *n)
1456 {
1457 /* This function handles both typedargslist (function definition)
1458 and varargslist (lambda definition).
1459
1460 parameters: '(' [typedargslist] ')'
1461
1462 The following definition for typedarglist is equivalent to this set of rules:
1463
1464 arguments = argument (',' [TYPE_COMMENT] argument)*
1465 argument = tfpdef ['=' test]
1466 kwargs = '**' tfpdef [','] [TYPE_COMMENT]
1467 args = '*' [tfpdef]
1468 kwonly_kwargs = (',' [TYPE_COMMENT] argument)* (TYPE_COMMENT | [','
1469 [TYPE_COMMENT] [kwargs]])
1470 args_kwonly_kwargs = args kwonly_kwargs | kwargs
1471 poskeyword_args_kwonly_kwargs = arguments ( TYPE_COMMENT | [','
1472 [TYPE_COMMENT] [args_kwonly_kwargs]])
1473 typedargslist_no_posonly = poskeyword_args_kwonly_kwargs | args_kwonly_kwargs
1474 typedarglist = (arguments ',' [TYPE_COMMENT] '/' [',' [[TYPE_COMMENT]
1475 typedargslist_no_posonly]])|(typedargslist_no_posonly)"
1476
1477 typedargslist: ( (tfpdef ['=' test] (',' [TYPE_COMMENT] tfpdef ['=' test])*
1478 ',' [TYPE_COMMENT] '/' [',' [ [TYPE_COMMENT] tfpdef ['=' test] ( ','
1479 [TYPE_COMMENT] tfpdef ['=' test])* (TYPE_COMMENT | [',' [TYPE_COMMENT] [ '*'
1480 [tfpdef] (',' [TYPE_COMMENT] tfpdef ['=' test])* (TYPE_COMMENT | [','
1481 [TYPE_COMMENT] ['**' tfpdef [','] [TYPE_COMMENT]]]) | '**' tfpdef [',']
1482 [TYPE_COMMENT]]]) | '*' [tfpdef] (',' [TYPE_COMMENT] tfpdef ['=' test])*
1483 (TYPE_COMMENT | [',' [TYPE_COMMENT] ['**' tfpdef [','] [TYPE_COMMENT]]]) |
1484 '**' tfpdef [','] [TYPE_COMMENT]]] ) | (tfpdef ['=' test] (','
1485 [TYPE_COMMENT] tfpdef ['=' test])* (TYPE_COMMENT | [',' [TYPE_COMMENT] [ '*'
1486 [tfpdef] (',' [TYPE_COMMENT] tfpdef ['=' test])* (TYPE_COMMENT | [','
1487 [TYPE_COMMENT] ['**' tfpdef [','] [TYPE_COMMENT]]]) | '**' tfpdef [',']
1488 [TYPE_COMMENT]]]) | '*' [tfpdef] (',' [TYPE_COMMENT] tfpdef ['=' test])*
1489 (TYPE_COMMENT | [',' [TYPE_COMMENT] ['**' tfpdef [','] [TYPE_COMMENT]]]) |
1490 '**' tfpdef [','] [TYPE_COMMENT]))
1491
1492 tfpdef: NAME [':' test]
1493
1494 The following definition for varargslist is equivalent to this set of rules:
1495
1496 arguments = argument (',' argument )*
1497 argument = vfpdef ['=' test]
1498 kwargs = '**' vfpdef [',']
1499 args = '*' [vfpdef]
1500 kwonly_kwargs = (',' argument )* [',' [kwargs]]
1501 args_kwonly_kwargs = args kwonly_kwargs | kwargs
1502 poskeyword_args_kwonly_kwargs = arguments [',' [args_kwonly_kwargs]]
1503 vararglist_no_posonly = poskeyword_args_kwonly_kwargs | args_kwonly_kwargs
1504 varargslist = arguments ',' '/' [','[(vararglist_no_posonly)]] |
1505 (vararglist_no_posonly)
1506
1507 varargslist: vfpdef ['=' test ](',' vfpdef ['=' test])* ',' '/' [',' [ (vfpdef ['='
1508 test] (',' vfpdef ['=' test])* [',' [ '*' [vfpdef] (',' vfpdef ['=' test])* [','
1509 ['**' vfpdef [',']]] | '**' vfpdef [',']]] | '*' [vfpdef] (',' vfpdef ['=' test])*
1510 [',' ['**' vfpdef [',']]] | '**' vfpdef [',']) ]] | (vfpdef ['=' test] (',' vfpdef
1511 ['=' test])* [',' [ '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
1512 | '**' vfpdef [',']]] | '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef
1513 [',']]] | '**' vfpdef [','])
1514
1515 vfpdef: NAME
1516
1517 */
1518 int i, j, k, l, nposonlyargs=0, nposargs = 0, nkwonlyargs = 0;
1519 int nposdefaults = 0, found_default = 0;
1520 asdl_seq *posonlyargs, *posargs, *posdefaults, *kwonlyargs, *kwdefaults;
1521 arg_ty vararg = NULL, kwarg = NULL;
1522 arg_ty arg = NULL;
1523 node *ch;
1524
1525 if (TYPE(n) == parameters) {
1526 if (NCH(n) == 2) /* () as argument list */
1527 return arguments(NULL, NULL, NULL, NULL, NULL, NULL, NULL, c->c_arena);
1528 n = CHILD(n, 1);
1529 }
1530 assert(TYPE(n) == typedargslist || TYPE(n) == varargslist);
1531
1532 /* First count the number of positional args & defaults. The
1533 variable i is the loop index for this for loop and the next.
1534 The next loop picks up where the first leaves off.
1535 */
1536 for (i = 0; i < NCH(n); i++) {
1537 ch = CHILD(n, i);
1538 if (TYPE(ch) == STAR) {
1539 /* skip star */
1540 i++;
1541 if (i < NCH(n) && /* skip argument following star */
1542 (TYPE(CHILD(n, i)) == tfpdef ||
1543 TYPE(CHILD(n, i)) == vfpdef)) {
1544 i++;
1545 }
1546 break;
1547 }
1548 if (TYPE(ch) == DOUBLESTAR) break;
1549 if (TYPE(ch) == vfpdef || TYPE(ch) == tfpdef) nposargs++;
1550 if (TYPE(ch) == EQUAL) nposdefaults++;
1551 if (TYPE(ch) == SLASH ) {
1552 nposonlyargs = nposargs;
1553 nposargs = 0;
1554 }
1555 }
1556 /* count the number of keyword only args &
1557 defaults for keyword only args */
1558 for ( ; i < NCH(n); ++i) {
1559 ch = CHILD(n, i);
1560 if (TYPE(ch) == DOUBLESTAR) break;
1561 if (TYPE(ch) == tfpdef || TYPE(ch) == vfpdef) nkwonlyargs++;
1562 }
1563 posonlyargs = (nposonlyargs ? _Py_asdl_seq_new(nposonlyargs, c->c_arena) : NULL);
1564 if (!posonlyargs && nposonlyargs) {
1565 return NULL;
1566 }
1567 posargs = (nposargs ? _Py_asdl_seq_new(nposargs, c->c_arena) : NULL);
1568 if (!posargs && nposargs)
1569 return NULL;
1570 kwonlyargs = (nkwonlyargs ?
1571 _Py_asdl_seq_new(nkwonlyargs, c->c_arena) : NULL);
1572 if (!kwonlyargs && nkwonlyargs)
1573 return NULL;
1574 posdefaults = (nposdefaults ?
1575 _Py_asdl_seq_new(nposdefaults, c->c_arena) : NULL);
1576 if (!posdefaults && nposdefaults)
1577 return NULL;
1578 /* The length of kwonlyargs and kwdefaults are same
1579 since we set NULL as default for keyword only argument w/o default
1580 - we have sequence data structure, but no dictionary */
1581 kwdefaults = (nkwonlyargs ?
1582 _Py_asdl_seq_new(nkwonlyargs, c->c_arena) : NULL);
1583 if (!kwdefaults && nkwonlyargs)
1584 return NULL;
1585
1586 /* tfpdef: NAME [':' test]
1587 vfpdef: NAME
1588 */
1589 i = 0;
1590 j = 0; /* index for defaults */
1591 k = 0; /* index for args */
1592 l = 0; /* index for posonlyargs */
1593 while (i < NCH(n)) {
1594 ch = CHILD(n, i);
1595 switch (TYPE(ch)) {
1596 case tfpdef:
1597 case vfpdef:
1598 /* XXX Need to worry about checking if TYPE(CHILD(n, i+1)) is
1599 anything other than EQUAL or a comma? */
1600 /* XXX Should NCH(n) check be made a separate check? */
1601 if (i + 1 < NCH(n) && TYPE(CHILD(n, i + 1)) == EQUAL) {
1602 expr_ty expression = ast_for_expr(c, CHILD(n, i + 2));
1603 if (!expression)
1604 return NULL;
1605 assert(posdefaults != NULL);
1606 asdl_seq_SET(posdefaults, j++, expression);
1607 i += 2;
1608 found_default = 1;
1609 }
1610 else if (found_default) {
1611 ast_error(c, n,
1612 "non-default argument follows default argument");
1613 return NULL;
1614 }
1615 arg = ast_for_arg(c, ch);
1616 if (!arg)
1617 return NULL;
1618 if (l < nposonlyargs) {
1619 asdl_seq_SET(posonlyargs, l++, arg);
1620 } else {
1621 asdl_seq_SET(posargs, k++, arg);
1622 }
1623 i += 1; /* the name */
1624 if (i < NCH(n) && TYPE(CHILD(n, i)) == COMMA)
1625 i += 1; /* the comma, if present */
1626 break;
1627 case SLASH:
1628 /* Advance the slash and the comma. If there are more names
1629 * after the slash there will be a comma so we are advancing
1630 * the correct number of nodes. If the slash is the last item,
1631 * we will be advancing an extra token but then * i > NCH(n)
1632 * and the enclosing while will finish correctly. */
1633 i += 2;
1634 break;
1635 case STAR:
1636 if (i+1 >= NCH(n) ||
1637 (i+2 == NCH(n) && (TYPE(CHILD(n, i+1)) == COMMA
1638 || TYPE(CHILD(n, i+1)) == TYPE_COMMENT))) {
1639 ast_error(c, CHILD(n, i),
1640 "named arguments must follow bare *");
1641 return NULL;
1642 }
1643 ch = CHILD(n, i+1); /* tfpdef or COMMA */
1644 if (TYPE(ch) == COMMA) {
1645 int res = 0;
1646 i += 2; /* now follows keyword only arguments */
1647
1648 if (i < NCH(n) && TYPE(CHILD(n, i)) == TYPE_COMMENT) {
1649 ast_error(c, CHILD(n, i),
1650 "bare * has associated type comment");
1651 return NULL;
1652 }
1653
1654 res = handle_keywordonly_args(c, n, i,
1655 kwonlyargs, kwdefaults);
1656 if (res == -1) return NULL;
1657 i = res; /* res has new position to process */
1658 }
1659 else {
1660 vararg = ast_for_arg(c, ch);
1661 if (!vararg)
1662 return NULL;
1663
1664 i += 2; /* the star and the name */
1665 if (i < NCH(n) && TYPE(CHILD(n, i)) == COMMA)
1666 i += 1; /* the comma, if present */
1667
1668 if (i < NCH(n) && TYPE(CHILD(n, i)) == TYPE_COMMENT) {
1669 vararg->type_comment = NEW_TYPE_COMMENT(CHILD(n, i));
1670 if (!vararg->type_comment)
1671 return NULL;
1672 i += 1;
1673 }
1674
1675 if (i < NCH(n) && (TYPE(CHILD(n, i)) == tfpdef
1676 || TYPE(CHILD(n, i)) == vfpdef)) {
1677 int res = 0;
1678 res = handle_keywordonly_args(c, n, i,
1679 kwonlyargs, kwdefaults);
1680 if (res == -1) return NULL;
1681 i = res; /* res has new position to process */
1682 }
1683 }
1684 break;
1685 case DOUBLESTAR:
1686 ch = CHILD(n, i+1); /* tfpdef */
1687 assert(TYPE(ch) == tfpdef || TYPE(ch) == vfpdef);
1688 kwarg = ast_for_arg(c, ch);
1689 if (!kwarg)
1690 return NULL;
1691 i += 2; /* the double star and the name */
1692 if (i < NCH(n) && TYPE(CHILD(n, i)) == COMMA)
1693 i += 1; /* the comma, if present */
1694 break;
1695 case TYPE_COMMENT:
1696 assert(i);
1697
1698 if (kwarg)
1699 arg = kwarg;
1700
1701 /* arg will be equal to the last argument processed */
1702 arg->type_comment = NEW_TYPE_COMMENT(ch);
1703 if (!arg->type_comment)
1704 return NULL;
1705 i += 1;
1706 break;
1707 default:
1708 PyErr_Format(PyExc_SystemError,
1709 "unexpected node in varargslist: %d @ %d",
1710 TYPE(ch), i);
1711 return NULL;
1712 }
1713 }
1714 return arguments(posonlyargs, posargs, vararg, kwonlyargs, kwdefaults, kwarg, posdefaults, c->c_arena);
1715 }
1716
1717 static expr_ty
ast_for_dotted_name(struct compiling * c,const node * n)1718 ast_for_dotted_name(struct compiling *c, const node *n)
1719 {
1720 expr_ty e;
1721 identifier id;
1722 int lineno, col_offset;
1723 int i;
1724 node *ch;
1725
1726 REQ(n, dotted_name);
1727
1728 lineno = LINENO(n);
1729 col_offset = n->n_col_offset;
1730
1731 ch = CHILD(n, 0);
1732 id = NEW_IDENTIFIER(ch);
1733 if (!id)
1734 return NULL;
1735 e = Name(id, Load, lineno, col_offset,
1736 ch->n_end_lineno, ch->n_end_col_offset, c->c_arena);
1737 if (!e)
1738 return NULL;
1739
1740 for (i = 2; i < NCH(n); i+=2) {
1741 const node *child = CHILD(n, i);
1742 id = NEW_IDENTIFIER(child);
1743 if (!id)
1744 return NULL;
1745 e = Attribute(e, id, Load, lineno, col_offset,
1746 child->n_end_lineno, child->n_end_col_offset, c->c_arena);
1747 if (!e)
1748 return NULL;
1749 }
1750
1751 return e;
1752 }
1753
1754 static expr_ty
ast_for_decorator(struct compiling * c,const node * n)1755 ast_for_decorator(struct compiling *c, const node *n)
1756 {
1757 /* decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE */
1758 expr_ty d = NULL;
1759 expr_ty name_expr;
1760
1761 REQ(n, decorator);
1762 REQ(CHILD(n, 0), AT);
1763 REQ(RCHILD(n, -1), NEWLINE);
1764
1765 name_expr = ast_for_dotted_name(c, CHILD(n, 1));
1766 if (!name_expr)
1767 return NULL;
1768
1769 if (NCH(n) == 3) { /* No arguments */
1770 d = name_expr;
1771 name_expr = NULL;
1772 }
1773 else if (NCH(n) == 5) { /* Call with no arguments */
1774 d = Call(name_expr, NULL, NULL,
1775 name_expr->lineno, name_expr->col_offset,
1776 CHILD(n, 3)->n_end_lineno, CHILD(n, 3)->n_end_col_offset,
1777 c->c_arena);
1778 if (!d)
1779 return NULL;
1780 name_expr = NULL;
1781 }
1782 else {
1783 d = ast_for_call(c, CHILD(n, 3), name_expr,
1784 CHILD(n, 1), CHILD(n, 2), CHILD(n, 4));
1785 if (!d)
1786 return NULL;
1787 name_expr = NULL;
1788 }
1789
1790 return d;
1791 }
1792
1793 static asdl_seq*
ast_for_decorators(struct compiling * c,const node * n)1794 ast_for_decorators(struct compiling *c, const node *n)
1795 {
1796 asdl_seq* decorator_seq;
1797 expr_ty d;
1798 int i;
1799
1800 REQ(n, decorators);
1801 decorator_seq = _Py_asdl_seq_new(NCH(n), c->c_arena);
1802 if (!decorator_seq)
1803 return NULL;
1804
1805 for (i = 0; i < NCH(n); i++) {
1806 d = ast_for_decorator(c, CHILD(n, i));
1807 if (!d)
1808 return NULL;
1809 asdl_seq_SET(decorator_seq, i, d);
1810 }
1811 return decorator_seq;
1812 }
1813
1814 static stmt_ty
ast_for_funcdef_impl(struct compiling * c,const node * n0,asdl_seq * decorator_seq,bool is_async)1815 ast_for_funcdef_impl(struct compiling *c, const node *n0,
1816 asdl_seq *decorator_seq, bool is_async)
1817 {
1818 /* funcdef: 'def' NAME parameters ['->' test] ':' [TYPE_COMMENT] suite */
1819 const node * const n = is_async ? CHILD(n0, 1) : n0;
1820 identifier name;
1821 arguments_ty args;
1822 asdl_seq *body;
1823 expr_ty returns = NULL;
1824 int name_i = 1;
1825 int end_lineno, end_col_offset;
1826 node *tc;
1827 string type_comment = NULL;
1828
1829 if (is_async && c->c_feature_version < 5) {
1830 ast_error(c, n,
1831 "Async functions are only supported in Python 3.5 and greater");
1832 return NULL;
1833 }
1834
1835 REQ(n, funcdef);
1836
1837 name = NEW_IDENTIFIER(CHILD(n, name_i));
1838 if (!name)
1839 return NULL;
1840 if (forbidden_name(c, name, CHILD(n, name_i), 0))
1841 return NULL;
1842 args = ast_for_arguments(c, CHILD(n, name_i + 1));
1843 if (!args)
1844 return NULL;
1845 if (TYPE(CHILD(n, name_i+2)) == RARROW) {
1846 returns = ast_for_expr(c, CHILD(n, name_i + 3));
1847 if (!returns)
1848 return NULL;
1849 name_i += 2;
1850 }
1851 if (TYPE(CHILD(n, name_i + 3)) == TYPE_COMMENT) {
1852 type_comment = NEW_TYPE_COMMENT(CHILD(n, name_i + 3));
1853 if (!type_comment)
1854 return NULL;
1855 name_i += 1;
1856 }
1857 body = ast_for_suite(c, CHILD(n, name_i + 3));
1858 if (!body)
1859 return NULL;
1860 get_last_end_pos(body, &end_lineno, &end_col_offset);
1861
1862 if (NCH(CHILD(n, name_i + 3)) > 1) {
1863 /* Check if the suite has a type comment in it. */
1864 tc = CHILD(CHILD(n, name_i + 3), 1);
1865
1866 if (TYPE(tc) == TYPE_COMMENT) {
1867 if (type_comment != NULL) {
1868 ast_error(c, n, "Cannot have two type comments on def");
1869 return NULL;
1870 }
1871 type_comment = NEW_TYPE_COMMENT(tc);
1872 if (!type_comment)
1873 return NULL;
1874 }
1875 }
1876
1877 if (is_async)
1878 return AsyncFunctionDef(name, args, body, decorator_seq, returns, type_comment,
1879 LINENO(n0), n0->n_col_offset, end_lineno, end_col_offset, c->c_arena);
1880 else
1881 return FunctionDef(name, args, body, decorator_seq, returns, type_comment,
1882 LINENO(n), n->n_col_offset, end_lineno, end_col_offset, c->c_arena);
1883 }
1884
1885 static stmt_ty
ast_for_async_funcdef(struct compiling * c,const node * n,asdl_seq * decorator_seq)1886 ast_for_async_funcdef(struct compiling *c, const node *n, asdl_seq *decorator_seq)
1887 {
1888 /* async_funcdef: ASYNC funcdef */
1889 REQ(n, async_funcdef);
1890 REQ(CHILD(n, 0), ASYNC);
1891 REQ(CHILD(n, 1), funcdef);
1892
1893 return ast_for_funcdef_impl(c, n, decorator_seq,
1894 true /* is_async */);
1895 }
1896
1897 static stmt_ty
ast_for_funcdef(struct compiling * c,const node * n,asdl_seq * decorator_seq)1898 ast_for_funcdef(struct compiling *c, const node *n, asdl_seq *decorator_seq)
1899 {
1900 /* funcdef: 'def' NAME parameters ['->' test] ':' suite */
1901 return ast_for_funcdef_impl(c, n, decorator_seq,
1902 false /* is_async */);
1903 }
1904
1905
1906 static stmt_ty
ast_for_async_stmt(struct compiling * c,const node * n)1907 ast_for_async_stmt(struct compiling *c, const node *n)
1908 {
1909 /* async_stmt: ASYNC (funcdef | with_stmt | for_stmt) */
1910 REQ(n, async_stmt);
1911 REQ(CHILD(n, 0), ASYNC);
1912
1913 switch (TYPE(CHILD(n, 1))) {
1914 case funcdef:
1915 return ast_for_funcdef_impl(c, n, NULL,
1916 true /* is_async */);
1917 case with_stmt:
1918 return ast_for_with_stmt(c, n,
1919 true /* is_async */);
1920
1921 case for_stmt:
1922 return ast_for_for_stmt(c, n,
1923 true /* is_async */);
1924
1925 default:
1926 PyErr_Format(PyExc_SystemError,
1927 "invalid async stament: %s",
1928 STR(CHILD(n, 1)));
1929 return NULL;
1930 }
1931 }
1932
1933 static stmt_ty
ast_for_decorated(struct compiling * c,const node * n)1934 ast_for_decorated(struct compiling *c, const node *n)
1935 {
1936 /* decorated: decorators (classdef | funcdef | async_funcdef) */
1937 stmt_ty thing = NULL;
1938 asdl_seq *decorator_seq = NULL;
1939
1940 REQ(n, decorated);
1941
1942 decorator_seq = ast_for_decorators(c, CHILD(n, 0));
1943 if (!decorator_seq)
1944 return NULL;
1945
1946 assert(TYPE(CHILD(n, 1)) == funcdef ||
1947 TYPE(CHILD(n, 1)) == async_funcdef ||
1948 TYPE(CHILD(n, 1)) == classdef);
1949
1950 if (TYPE(CHILD(n, 1)) == funcdef) {
1951 thing = ast_for_funcdef(c, CHILD(n, 1), decorator_seq);
1952 } else if (TYPE(CHILD(n, 1)) == classdef) {
1953 thing = ast_for_classdef(c, CHILD(n, 1), decorator_seq);
1954 } else if (TYPE(CHILD(n, 1)) == async_funcdef) {
1955 thing = ast_for_async_funcdef(c, CHILD(n, 1), decorator_seq);
1956 }
1957 return thing;
1958 }
1959
1960 static expr_ty
ast_for_namedexpr(struct compiling * c,const node * n)1961 ast_for_namedexpr(struct compiling *c, const node *n)
1962 {
1963 /* namedexpr_test: test [':=' test]
1964 argument: ( test [comp_for] |
1965 test ':=' test |
1966 test '=' test |
1967 '**' test |
1968 '*' test )
1969 */
1970 expr_ty target, value;
1971
1972 target = ast_for_expr(c, CHILD(n, 0));
1973 if (!target)
1974 return NULL;
1975
1976 value = ast_for_expr(c, CHILD(n, 2));
1977 if (!value)
1978 return NULL;
1979
1980 if (target->kind != Name_kind) {
1981 const char *expr_name = get_expr_name(target);
1982 if (expr_name != NULL) {
1983 ast_error(c, n, "cannot use assignment expressions with %s", expr_name);
1984 }
1985 return NULL;
1986 }
1987
1988 if (!set_context(c, target, Store, n))
1989 return NULL;
1990
1991 return NamedExpr(target, value, LINENO(n), n->n_col_offset, n->n_end_lineno,
1992 n->n_end_col_offset, c->c_arena);
1993 }
1994
1995 static expr_ty
ast_for_lambdef(struct compiling * c,const node * n)1996 ast_for_lambdef(struct compiling *c, const node *n)
1997 {
1998 /* lambdef: 'lambda' [varargslist] ':' test
1999 lambdef_nocond: 'lambda' [varargslist] ':' test_nocond */
2000 arguments_ty args;
2001 expr_ty expression;
2002
2003 if (NCH(n) == 3) {
2004 args = arguments(NULL, NULL, NULL, NULL, NULL, NULL, NULL, c->c_arena);
2005 if (!args)
2006 return NULL;
2007 expression = ast_for_expr(c, CHILD(n, 2));
2008 if (!expression)
2009 return NULL;
2010 }
2011 else {
2012 args = ast_for_arguments(c, CHILD(n, 1));
2013 if (!args)
2014 return NULL;
2015 expression = ast_for_expr(c, CHILD(n, 3));
2016 if (!expression)
2017 return NULL;
2018 }
2019
2020 return Lambda(args, expression, LINENO(n), n->n_col_offset,
2021 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2022 }
2023
2024 static expr_ty
ast_for_ifexpr(struct compiling * c,const node * n)2025 ast_for_ifexpr(struct compiling *c, const node *n)
2026 {
2027 /* test: or_test 'if' or_test 'else' test */
2028 expr_ty expression, body, orelse;
2029
2030 assert(NCH(n) == 5);
2031 body = ast_for_expr(c, CHILD(n, 0));
2032 if (!body)
2033 return NULL;
2034 expression = ast_for_expr(c, CHILD(n, 2));
2035 if (!expression)
2036 return NULL;
2037 orelse = ast_for_expr(c, CHILD(n, 4));
2038 if (!orelse)
2039 return NULL;
2040 return IfExp(expression, body, orelse, LINENO(n), n->n_col_offset,
2041 n->n_end_lineno, n->n_end_col_offset,
2042 c->c_arena);
2043 }
2044
2045 /*
2046 Count the number of 'for' loops in a comprehension.
2047
2048 Helper for ast_for_comprehension().
2049 */
2050
2051 static int
count_comp_fors(struct compiling * c,const node * n)2052 count_comp_fors(struct compiling *c, const node *n)
2053 {
2054 int n_fors = 0;
2055
2056 count_comp_for:
2057 n_fors++;
2058 REQ(n, comp_for);
2059 if (NCH(n) == 2) {
2060 REQ(CHILD(n, 0), ASYNC);
2061 n = CHILD(n, 1);
2062 }
2063 else if (NCH(n) == 1) {
2064 n = CHILD(n, 0);
2065 }
2066 else {
2067 goto error;
2068 }
2069 if (NCH(n) == (5)) {
2070 n = CHILD(n, 4);
2071 }
2072 else {
2073 return n_fors;
2074 }
2075 count_comp_iter:
2076 REQ(n, comp_iter);
2077 n = CHILD(n, 0);
2078 if (TYPE(n) == comp_for)
2079 goto count_comp_for;
2080 else if (TYPE(n) == comp_if) {
2081 if (NCH(n) == 3) {
2082 n = CHILD(n, 2);
2083 goto count_comp_iter;
2084 }
2085 else
2086 return n_fors;
2087 }
2088
2089 error:
2090 /* Should never be reached */
2091 PyErr_SetString(PyExc_SystemError,
2092 "logic error in count_comp_fors");
2093 return -1;
2094 }
2095
2096 /* Count the number of 'if' statements in a comprehension.
2097
2098 Helper for ast_for_comprehension().
2099 */
2100
2101 static int
count_comp_ifs(struct compiling * c,const node * n)2102 count_comp_ifs(struct compiling *c, const node *n)
2103 {
2104 int n_ifs = 0;
2105
2106 while (1) {
2107 REQ(n, comp_iter);
2108 if (TYPE(CHILD(n, 0)) == comp_for)
2109 return n_ifs;
2110 n = CHILD(n, 0);
2111 REQ(n, comp_if);
2112 n_ifs++;
2113 if (NCH(n) == 2)
2114 return n_ifs;
2115 n = CHILD(n, 2);
2116 }
2117 }
2118
2119 static asdl_seq *
ast_for_comprehension(struct compiling * c,const node * n)2120 ast_for_comprehension(struct compiling *c, const node *n)
2121 {
2122 int i, n_fors;
2123 asdl_seq *comps;
2124
2125 n_fors = count_comp_fors(c, n);
2126 if (n_fors == -1)
2127 return NULL;
2128
2129 comps = _Py_asdl_seq_new(n_fors, c->c_arena);
2130 if (!comps)
2131 return NULL;
2132
2133 for (i = 0; i < n_fors; i++) {
2134 comprehension_ty comp;
2135 asdl_seq *t;
2136 expr_ty expression, first;
2137 node *for_ch;
2138 node *sync_n;
2139 int is_async = 0;
2140
2141 REQ(n, comp_for);
2142
2143 if (NCH(n) == 2) {
2144 is_async = 1;
2145 REQ(CHILD(n, 0), ASYNC);
2146 sync_n = CHILD(n, 1);
2147 }
2148 else {
2149 sync_n = CHILD(n, 0);
2150 }
2151 REQ(sync_n, sync_comp_for);
2152
2153 /* Async comprehensions only allowed in Python 3.6 and greater */
2154 if (is_async && c->c_feature_version < 6) {
2155 ast_error(c, n,
2156 "Async comprehensions are only supported in Python 3.6 and greater");
2157 return NULL;
2158 }
2159
2160 for_ch = CHILD(sync_n, 1);
2161 t = ast_for_exprlist(c, for_ch, Store);
2162 if (!t)
2163 return NULL;
2164 expression = ast_for_expr(c, CHILD(sync_n, 3));
2165 if (!expression)
2166 return NULL;
2167
2168 /* Check the # of children rather than the length of t, since
2169 (x for x, in ...) has 1 element in t, but still requires a Tuple. */
2170 first = (expr_ty)asdl_seq_GET(t, 0);
2171 if (NCH(for_ch) == 1)
2172 comp = comprehension(first, expression, NULL,
2173 is_async, c->c_arena);
2174 else
2175 comp = comprehension(Tuple(t, Store, first->lineno, first->col_offset,
2176 for_ch->n_end_lineno, for_ch->n_end_col_offset,
2177 c->c_arena),
2178 expression, NULL, is_async, c->c_arena);
2179 if (!comp)
2180 return NULL;
2181
2182 if (NCH(sync_n) == 5) {
2183 int j, n_ifs;
2184 asdl_seq *ifs;
2185
2186 n = CHILD(sync_n, 4);
2187 n_ifs = count_comp_ifs(c, n);
2188 if (n_ifs == -1)
2189 return NULL;
2190
2191 ifs = _Py_asdl_seq_new(n_ifs, c->c_arena);
2192 if (!ifs)
2193 return NULL;
2194
2195 for (j = 0; j < n_ifs; j++) {
2196 REQ(n, comp_iter);
2197 n = CHILD(n, 0);
2198 REQ(n, comp_if);
2199
2200 expression = ast_for_expr(c, CHILD(n, 1));
2201 if (!expression)
2202 return NULL;
2203 asdl_seq_SET(ifs, j, expression);
2204 if (NCH(n) == 3)
2205 n = CHILD(n, 2);
2206 }
2207 /* on exit, must guarantee that n is a comp_for */
2208 if (TYPE(n) == comp_iter)
2209 n = CHILD(n, 0);
2210 comp->ifs = ifs;
2211 }
2212 asdl_seq_SET(comps, i, comp);
2213 }
2214 return comps;
2215 }
2216
2217 static expr_ty
ast_for_itercomp(struct compiling * c,const node * n,int type)2218 ast_for_itercomp(struct compiling *c, const node *n, int type)
2219 {
2220 /* testlist_comp: (test|star_expr)
2221 * ( comp_for | (',' (test|star_expr))* [','] ) */
2222 expr_ty elt;
2223 asdl_seq *comps;
2224 node *ch;
2225
2226 assert(NCH(n) > 1);
2227
2228 ch = CHILD(n, 0);
2229 elt = ast_for_expr(c, ch);
2230 if (!elt)
2231 return NULL;
2232 if (elt->kind == Starred_kind) {
2233 ast_error(c, ch, "iterable unpacking cannot be used in comprehension");
2234 return NULL;
2235 }
2236
2237 comps = ast_for_comprehension(c, CHILD(n, 1));
2238 if (!comps)
2239 return NULL;
2240
2241 if (type == COMP_GENEXP)
2242 return GeneratorExp(elt, comps, LINENO(n), n->n_col_offset,
2243 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2244 else if (type == COMP_LISTCOMP)
2245 return ListComp(elt, comps, LINENO(n), n->n_col_offset,
2246 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2247 else if (type == COMP_SETCOMP)
2248 return SetComp(elt, comps, LINENO(n), n->n_col_offset,
2249 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2250 else
2251 /* Should never happen */
2252 return NULL;
2253 }
2254
2255 /* Fills in the key, value pair corresponding to the dict element. In case
2256 * of an unpacking, key is NULL. *i is advanced by the number of ast
2257 * elements. Iff successful, nonzero is returned.
2258 */
2259 static int
ast_for_dictelement(struct compiling * c,const node * n,int * i,expr_ty * key,expr_ty * value)2260 ast_for_dictelement(struct compiling *c, const node *n, int *i,
2261 expr_ty *key, expr_ty *value)
2262 {
2263 expr_ty expression;
2264 if (TYPE(CHILD(n, *i)) == DOUBLESTAR) {
2265 assert(NCH(n) - *i >= 2);
2266
2267 expression = ast_for_expr(c, CHILD(n, *i + 1));
2268 if (!expression)
2269 return 0;
2270 *key = NULL;
2271 *value = expression;
2272
2273 *i += 2;
2274 }
2275 else {
2276 assert(NCH(n) - *i >= 3);
2277
2278 expression = ast_for_expr(c, CHILD(n, *i));
2279 if (!expression)
2280 return 0;
2281 *key = expression;
2282
2283 REQ(CHILD(n, *i + 1), COLON);
2284
2285 expression = ast_for_expr(c, CHILD(n, *i + 2));
2286 if (!expression)
2287 return 0;
2288 *value = expression;
2289
2290 *i += 3;
2291 }
2292 return 1;
2293 }
2294
2295 static expr_ty
ast_for_dictcomp(struct compiling * c,const node * n)2296 ast_for_dictcomp(struct compiling *c, const node *n)
2297 {
2298 expr_ty key, value;
2299 asdl_seq *comps;
2300 int i = 0;
2301
2302 if (!ast_for_dictelement(c, n, &i, &key, &value))
2303 return NULL;
2304 assert(key);
2305 assert(NCH(n) - i >= 1);
2306
2307 comps = ast_for_comprehension(c, CHILD(n, i));
2308 if (!comps)
2309 return NULL;
2310
2311 return DictComp(key, value, comps, LINENO(n), n->n_col_offset,
2312 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2313 }
2314
2315 static expr_ty
ast_for_dictdisplay(struct compiling * c,const node * n)2316 ast_for_dictdisplay(struct compiling *c, const node *n)
2317 {
2318 int i;
2319 int j;
2320 int size;
2321 asdl_seq *keys, *values;
2322
2323 size = (NCH(n) + 1) / 3; /* +1 in case no trailing comma */
2324 keys = _Py_asdl_seq_new(size, c->c_arena);
2325 if (!keys)
2326 return NULL;
2327
2328 values = _Py_asdl_seq_new(size, c->c_arena);
2329 if (!values)
2330 return NULL;
2331
2332 j = 0;
2333 for (i = 0; i < NCH(n); i++) {
2334 expr_ty key, value;
2335
2336 if (!ast_for_dictelement(c, n, &i, &key, &value))
2337 return NULL;
2338 asdl_seq_SET(keys, j, key);
2339 asdl_seq_SET(values, j, value);
2340
2341 j++;
2342 }
2343 keys->size = j;
2344 values->size = j;
2345 return Dict(keys, values, LINENO(n), n->n_col_offset,
2346 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2347 }
2348
2349 static expr_ty
ast_for_genexp(struct compiling * c,const node * n)2350 ast_for_genexp(struct compiling *c, const node *n)
2351 {
2352 assert(TYPE(n) == (testlist_comp) || TYPE(n) == (argument));
2353 return ast_for_itercomp(c, n, COMP_GENEXP);
2354 }
2355
2356 static expr_ty
ast_for_listcomp(struct compiling * c,const node * n)2357 ast_for_listcomp(struct compiling *c, const node *n)
2358 {
2359 assert(TYPE(n) == (testlist_comp));
2360 return ast_for_itercomp(c, n, COMP_LISTCOMP);
2361 }
2362
2363 static expr_ty
ast_for_setcomp(struct compiling * c,const node * n)2364 ast_for_setcomp(struct compiling *c, const node *n)
2365 {
2366 assert(TYPE(n) == (dictorsetmaker));
2367 return ast_for_itercomp(c, n, COMP_SETCOMP);
2368 }
2369
2370 static expr_ty
ast_for_setdisplay(struct compiling * c,const node * n)2371 ast_for_setdisplay(struct compiling *c, const node *n)
2372 {
2373 int i;
2374 int size;
2375 asdl_seq *elts;
2376
2377 assert(TYPE(n) == (dictorsetmaker));
2378 size = (NCH(n) + 1) / 2; /* +1 in case no trailing comma */
2379 elts = _Py_asdl_seq_new(size, c->c_arena);
2380 if (!elts)
2381 return NULL;
2382 for (i = 0; i < NCH(n); i += 2) {
2383 expr_ty expression;
2384 expression = ast_for_expr(c, CHILD(n, i));
2385 if (!expression)
2386 return NULL;
2387 asdl_seq_SET(elts, i / 2, expression);
2388 }
2389 return Set(elts, LINENO(n), n->n_col_offset,
2390 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2391 }
2392
2393 static expr_ty
ast_for_atom(struct compiling * c,const node * n)2394 ast_for_atom(struct compiling *c, const node *n)
2395 {
2396 /* atom: '(' [yield_expr|testlist_comp] ')' | '[' [testlist_comp] ']'
2397 | '{' [dictmaker|testlist_comp] '}' | NAME | NUMBER | STRING+
2398 | '...' | 'None' | 'True' | 'False'
2399 */
2400 node *ch = CHILD(n, 0);
2401
2402 switch (TYPE(ch)) {
2403 case NAME: {
2404 PyObject *name;
2405 const char *s = STR(ch);
2406 size_t len = strlen(s);
2407 if (len >= 4 && len <= 5) {
2408 if (!strcmp(s, "None"))
2409 return Constant(Py_None, NULL, LINENO(n), n->n_col_offset,
2410 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2411 if (!strcmp(s, "True"))
2412 return Constant(Py_True, NULL, LINENO(n), n->n_col_offset,
2413 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2414 if (!strcmp(s, "False"))
2415 return Constant(Py_False, NULL, LINENO(n), n->n_col_offset,
2416 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2417 }
2418 name = new_identifier(s, c);
2419 if (!name)
2420 return NULL;
2421 /* All names start in Load context, but may later be changed. */
2422 return Name(name, Load, LINENO(n), n->n_col_offset,
2423 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2424 }
2425 case STRING: {
2426 expr_ty str = parsestrplus(c, n);
2427 if (!str) {
2428 const char *errtype = NULL;
2429 if (PyErr_ExceptionMatches(PyExc_UnicodeError))
2430 errtype = "unicode error";
2431 else if (PyErr_ExceptionMatches(PyExc_ValueError))
2432 errtype = "value error";
2433 if (errtype) {
2434 PyObject *type, *value, *tback, *errstr;
2435 PyErr_Fetch(&type, &value, &tback);
2436 errstr = PyObject_Str(value);
2437 if (errstr) {
2438 ast_error(c, n, "(%s) %U", errtype, errstr);
2439 Py_DECREF(errstr);
2440 }
2441 else {
2442 PyErr_Clear();
2443 ast_error(c, n, "(%s) unknown error", errtype);
2444 }
2445 Py_DECREF(type);
2446 Py_XDECREF(value);
2447 Py_XDECREF(tback);
2448 }
2449 return NULL;
2450 }
2451 return str;
2452 }
2453 case NUMBER: {
2454 PyObject *pynum;
2455 /* Underscores in numeric literals are only allowed in Python 3.6 or greater */
2456 /* Check for underscores here rather than in parse_number so we can report a line number on error */
2457 if (c->c_feature_version < 6 && strchr(STR(ch), '_') != NULL) {
2458 ast_error(c, ch,
2459 "Underscores in numeric literals are only supported in Python 3.6 and greater");
2460 return NULL;
2461 }
2462 pynum = parsenumber(c, STR(ch));
2463 if (!pynum)
2464 return NULL;
2465
2466 if (PyArena_AddPyObject(c->c_arena, pynum) < 0) {
2467 Py_DECREF(pynum);
2468 return NULL;
2469 }
2470 return Constant(pynum, NULL, LINENO(n), n->n_col_offset,
2471 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2472 }
2473 case ELLIPSIS: /* Ellipsis */
2474 return Constant(Py_Ellipsis, NULL, LINENO(n), n->n_col_offset,
2475 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2476 case LPAR: /* some parenthesized expressions */
2477 ch = CHILD(n, 1);
2478
2479 if (TYPE(ch) == RPAR)
2480 return Tuple(NULL, Load, LINENO(n), n->n_col_offset,
2481 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2482
2483 if (TYPE(ch) == yield_expr)
2484 return ast_for_expr(c, ch);
2485
2486 /* testlist_comp: test ( comp_for | (',' test)* [','] ) */
2487 if (NCH(ch) == 1) {
2488 return ast_for_testlist(c, ch);
2489 }
2490
2491 if (TYPE(CHILD(ch, 1)) == comp_for) {
2492 return copy_location(ast_for_genexp(c, ch), n, n);
2493 }
2494 else {
2495 return copy_location(ast_for_testlist(c, ch), n, n);
2496 }
2497 case LSQB: /* list (or list comprehension) */
2498 ch = CHILD(n, 1);
2499
2500 if (TYPE(ch) == RSQB)
2501 return List(NULL, Load, LINENO(n), n->n_col_offset,
2502 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2503
2504 REQ(ch, testlist_comp);
2505 if (NCH(ch) == 1 || TYPE(CHILD(ch, 1)) == COMMA) {
2506 asdl_seq *elts = seq_for_testlist(c, ch);
2507 if (!elts)
2508 return NULL;
2509
2510 return List(elts, Load, LINENO(n), n->n_col_offset,
2511 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2512 }
2513 else {
2514 return copy_location(ast_for_listcomp(c, ch), n, n);
2515 }
2516 case LBRACE: {
2517 /* dictorsetmaker: ( ((test ':' test | '**' test)
2518 * (comp_for | (',' (test ':' test | '**' test))* [','])) |
2519 * ((test | '*' test)
2520 * (comp_for | (',' (test | '*' test))* [','])) ) */
2521 expr_ty res;
2522 ch = CHILD(n, 1);
2523 if (TYPE(ch) == RBRACE) {
2524 /* It's an empty dict. */
2525 return Dict(NULL, NULL, LINENO(n), n->n_col_offset,
2526 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2527 }
2528 else {
2529 int is_dict = (TYPE(CHILD(ch, 0)) == DOUBLESTAR);
2530 if (NCH(ch) == 1 ||
2531 (NCH(ch) > 1 &&
2532 TYPE(CHILD(ch, 1)) == COMMA)) {
2533 /* It's a set display. */
2534 res = ast_for_setdisplay(c, ch);
2535 }
2536 else if (NCH(ch) > 1 &&
2537 TYPE(CHILD(ch, 1)) == comp_for) {
2538 /* It's a set comprehension. */
2539 res = ast_for_setcomp(c, ch);
2540 }
2541 else if (NCH(ch) > 3 - is_dict &&
2542 TYPE(CHILD(ch, 3 - is_dict)) == comp_for) {
2543 /* It's a dictionary comprehension. */
2544 if (is_dict) {
2545 ast_error(c, n,
2546 "dict unpacking cannot be used in dict comprehension");
2547 return NULL;
2548 }
2549 res = ast_for_dictcomp(c, ch);
2550 }
2551 else {
2552 /* It's a dictionary display. */
2553 res = ast_for_dictdisplay(c, ch);
2554 }
2555 return copy_location(res, n, n);
2556 }
2557 }
2558 default:
2559 PyErr_Format(PyExc_SystemError, "unhandled atom %d", TYPE(ch));
2560 return NULL;
2561 }
2562 }
2563
2564 static slice_ty
ast_for_slice(struct compiling * c,const node * n)2565 ast_for_slice(struct compiling *c, const node *n)
2566 {
2567 node *ch;
2568 expr_ty lower = NULL, upper = NULL, step = NULL;
2569
2570 REQ(n, subscript);
2571
2572 /*
2573 subscript: test | [test] ':' [test] [sliceop]
2574 sliceop: ':' [test]
2575 */
2576 ch = CHILD(n, 0);
2577 if (NCH(n) == 1 && TYPE(ch) == test) {
2578 /* 'step' variable hold no significance in terms of being used over
2579 other vars */
2580 step = ast_for_expr(c, ch);
2581 if (!step)
2582 return NULL;
2583
2584 return Index(step, c->c_arena);
2585 }
2586
2587 if (TYPE(ch) == test) {
2588 lower = ast_for_expr(c, ch);
2589 if (!lower)
2590 return NULL;
2591 }
2592
2593 /* If there's an upper bound it's in the second or third position. */
2594 if (TYPE(ch) == COLON) {
2595 if (NCH(n) > 1) {
2596 node *n2 = CHILD(n, 1);
2597
2598 if (TYPE(n2) == test) {
2599 upper = ast_for_expr(c, n2);
2600 if (!upper)
2601 return NULL;
2602 }
2603 }
2604 } else if (NCH(n) > 2) {
2605 node *n2 = CHILD(n, 2);
2606
2607 if (TYPE(n2) == test) {
2608 upper = ast_for_expr(c, n2);
2609 if (!upper)
2610 return NULL;
2611 }
2612 }
2613
2614 ch = CHILD(n, NCH(n) - 1);
2615 if (TYPE(ch) == sliceop) {
2616 if (NCH(ch) != 1) {
2617 ch = CHILD(ch, 1);
2618 if (TYPE(ch) == test) {
2619 step = ast_for_expr(c, ch);
2620 if (!step)
2621 return NULL;
2622 }
2623 }
2624 }
2625
2626 return Slice(lower, upper, step, c->c_arena);
2627 }
2628
2629 static expr_ty
ast_for_binop(struct compiling * c,const node * n)2630 ast_for_binop(struct compiling *c, const node *n)
2631 {
2632 /* Must account for a sequence of expressions.
2633 How should A op B op C by represented?
2634 BinOp(BinOp(A, op, B), op, C).
2635 */
2636
2637 int i, nops;
2638 expr_ty expr1, expr2, result;
2639 operator_ty newoperator;
2640
2641 expr1 = ast_for_expr(c, CHILD(n, 0));
2642 if (!expr1)
2643 return NULL;
2644
2645 expr2 = ast_for_expr(c, CHILD(n, 2));
2646 if (!expr2)
2647 return NULL;
2648
2649 newoperator = get_operator(c, CHILD(n, 1));
2650 if (!newoperator)
2651 return NULL;
2652
2653 result = BinOp(expr1, newoperator, expr2, LINENO(n), n->n_col_offset,
2654 CHILD(n, 2)->n_end_lineno, CHILD(n, 2)->n_end_col_offset,
2655 c->c_arena);
2656 if (!result)
2657 return NULL;
2658
2659 nops = (NCH(n) - 1) / 2;
2660 for (i = 1; i < nops; i++) {
2661 expr_ty tmp_result, tmp;
2662 const node* next_oper = CHILD(n, i * 2 + 1);
2663
2664 newoperator = get_operator(c, next_oper);
2665 if (!newoperator)
2666 return NULL;
2667
2668 tmp = ast_for_expr(c, CHILD(n, i * 2 + 2));
2669 if (!tmp)
2670 return NULL;
2671
2672 tmp_result = BinOp(result, newoperator, tmp,
2673 LINENO(n), n->n_col_offset,
2674 CHILD(n, i * 2 + 2)->n_end_lineno,
2675 CHILD(n, i * 2 + 2)->n_end_col_offset,
2676 c->c_arena);
2677 if (!tmp_result)
2678 return NULL;
2679 result = tmp_result;
2680 }
2681 return result;
2682 }
2683
2684 static expr_ty
ast_for_trailer(struct compiling * c,const node * n,expr_ty left_expr,const node * start)2685 ast_for_trailer(struct compiling *c, const node *n, expr_ty left_expr, const node *start)
2686 {
2687 /* trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
2688 subscriptlist: subscript (',' subscript)* [',']
2689 subscript: '.' '.' '.' | test | [test] ':' [test] [sliceop]
2690 */
2691 const node *n_copy = n;
2692 REQ(n, trailer);
2693 if (TYPE(CHILD(n, 0)) == LPAR) {
2694 if (NCH(n) == 2)
2695 return Call(left_expr, NULL, NULL, LINENO(start), start->n_col_offset,
2696 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2697 else
2698 return ast_for_call(c, CHILD(n, 1), left_expr,
2699 start, CHILD(n, 0), CHILD(n, 2));
2700 }
2701 else if (TYPE(CHILD(n, 0)) == DOT) {
2702 PyObject *attr_id = NEW_IDENTIFIER(CHILD(n, 1));
2703 if (!attr_id)
2704 return NULL;
2705 return Attribute(left_expr, attr_id, Load,
2706 LINENO(start), start->n_col_offset,
2707 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2708 }
2709 else {
2710 REQ(CHILD(n, 0), LSQB);
2711 REQ(CHILD(n, 2), RSQB);
2712 n = CHILD(n, 1);
2713 if (NCH(n) == 1) {
2714 slice_ty slc = ast_for_slice(c, CHILD(n, 0));
2715 if (!slc)
2716 return NULL;
2717 return Subscript(left_expr, slc, Load, LINENO(start), start->n_col_offset,
2718 n_copy->n_end_lineno, n_copy->n_end_col_offset,
2719 c->c_arena);
2720 }
2721 else {
2722 /* The grammar is ambiguous here. The ambiguity is resolved
2723 by treating the sequence as a tuple literal if there are
2724 no slice features.
2725 */
2726 Py_ssize_t j;
2727 slice_ty slc;
2728 expr_ty e;
2729 int simple = 1;
2730 asdl_seq *slices, *elts;
2731 slices = _Py_asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
2732 if (!slices)
2733 return NULL;
2734 for (j = 0; j < NCH(n); j += 2) {
2735 slc = ast_for_slice(c, CHILD(n, j));
2736 if (!slc)
2737 return NULL;
2738 if (slc->kind != Index_kind)
2739 simple = 0;
2740 asdl_seq_SET(slices, j / 2, slc);
2741 }
2742 if (!simple) {
2743 return Subscript(left_expr, ExtSlice(slices, c->c_arena),
2744 Load, LINENO(start), start->n_col_offset,
2745 n_copy->n_end_lineno, n_copy->n_end_col_offset, c->c_arena);
2746 }
2747 /* extract Index values and put them in a Tuple */
2748 elts = _Py_asdl_seq_new(asdl_seq_LEN(slices), c->c_arena);
2749 if (!elts)
2750 return NULL;
2751 for (j = 0; j < asdl_seq_LEN(slices); ++j) {
2752 slc = (slice_ty)asdl_seq_GET(slices, j);
2753 assert(slc->kind == Index_kind && slc->v.Index.value);
2754 asdl_seq_SET(elts, j, slc->v.Index.value);
2755 }
2756 e = Tuple(elts, Load, LINENO(n), n->n_col_offset,
2757 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2758 if (!e)
2759 return NULL;
2760 return Subscript(left_expr, Index(e, c->c_arena),
2761 Load, LINENO(start), start->n_col_offset,
2762 n_copy->n_end_lineno, n_copy->n_end_col_offset, c->c_arena);
2763 }
2764 }
2765 }
2766
2767 static expr_ty
ast_for_factor(struct compiling * c,const node * n)2768 ast_for_factor(struct compiling *c, const node *n)
2769 {
2770 expr_ty expression;
2771
2772 expression = ast_for_expr(c, CHILD(n, 1));
2773 if (!expression)
2774 return NULL;
2775
2776 switch (TYPE(CHILD(n, 0))) {
2777 case PLUS:
2778 return UnaryOp(UAdd, expression, LINENO(n), n->n_col_offset,
2779 n->n_end_lineno, n->n_end_col_offset,
2780 c->c_arena);
2781 case MINUS:
2782 return UnaryOp(USub, expression, LINENO(n), n->n_col_offset,
2783 n->n_end_lineno, n->n_end_col_offset,
2784 c->c_arena);
2785 case TILDE:
2786 return UnaryOp(Invert, expression, LINENO(n), n->n_col_offset,
2787 n->n_end_lineno, n->n_end_col_offset,
2788 c->c_arena);
2789 }
2790 PyErr_Format(PyExc_SystemError, "unhandled factor: %d",
2791 TYPE(CHILD(n, 0)));
2792 return NULL;
2793 }
2794
2795 static expr_ty
ast_for_atom_expr(struct compiling * c,const node * n)2796 ast_for_atom_expr(struct compiling *c, const node *n)
2797 {
2798 int i, nch, start = 0;
2799 expr_ty e;
2800
2801 REQ(n, atom_expr);
2802 nch = NCH(n);
2803
2804 if (TYPE(CHILD(n, 0)) == AWAIT) {
2805 if (c->c_feature_version < 5) {
2806 ast_error(c, n,
2807 "Await expressions are only supported in Python 3.5 and greater");
2808 return NULL;
2809 }
2810 start = 1;
2811 assert(nch > 1);
2812 }
2813
2814 e = ast_for_atom(c, CHILD(n, start));
2815 if (!e)
2816 return NULL;
2817 if (nch == 1)
2818 return e;
2819 if (start && nch == 2) {
2820 return Await(e, LINENO(n), n->n_col_offset,
2821 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2822 }
2823
2824 for (i = start + 1; i < nch; i++) {
2825 node *ch = CHILD(n, i);
2826 if (TYPE(ch) != trailer)
2827 break;
2828 e = ast_for_trailer(c, ch, e, CHILD(n, start));
2829 if (!e)
2830 return NULL;
2831 }
2832
2833 if (start) {
2834 /* there was an 'await' */
2835 return Await(e, LINENO(n), n->n_col_offset,
2836 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2837 }
2838 else {
2839 return e;
2840 }
2841 }
2842
2843 static expr_ty
ast_for_power(struct compiling * c,const node * n)2844 ast_for_power(struct compiling *c, const node *n)
2845 {
2846 /* power: atom trailer* ('**' factor)*
2847 */
2848 expr_ty e;
2849 REQ(n, power);
2850 e = ast_for_atom_expr(c, CHILD(n, 0));
2851 if (!e)
2852 return NULL;
2853 if (NCH(n) == 1)
2854 return e;
2855 if (TYPE(CHILD(n, NCH(n) - 1)) == factor) {
2856 expr_ty f = ast_for_expr(c, CHILD(n, NCH(n) - 1));
2857 if (!f)
2858 return NULL;
2859 e = BinOp(e, Pow, f, LINENO(n), n->n_col_offset,
2860 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2861 }
2862 return e;
2863 }
2864
2865 static expr_ty
ast_for_starred(struct compiling * c,const node * n)2866 ast_for_starred(struct compiling *c, const node *n)
2867 {
2868 expr_ty tmp;
2869 REQ(n, star_expr);
2870
2871 tmp = ast_for_expr(c, CHILD(n, 1));
2872 if (!tmp)
2873 return NULL;
2874
2875 /* The Load context is changed later. */
2876 return Starred(tmp, Load, LINENO(n), n->n_col_offset,
2877 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2878 }
2879
2880
2881 /* Do not name a variable 'expr'! Will cause a compile error.
2882 */
2883
2884 static expr_ty
ast_for_expr(struct compiling * c,const node * n)2885 ast_for_expr(struct compiling *c, const node *n)
2886 {
2887 /* handle the full range of simple expressions
2888 namedexpr_test: test [':=' test]
2889 test: or_test ['if' or_test 'else' test] | lambdef
2890 test_nocond: or_test | lambdef_nocond
2891 or_test: and_test ('or' and_test)*
2892 and_test: not_test ('and' not_test)*
2893 not_test: 'not' not_test | comparison
2894 comparison: expr (comp_op expr)*
2895 expr: xor_expr ('|' xor_expr)*
2896 xor_expr: and_expr ('^' and_expr)*
2897 and_expr: shift_expr ('&' shift_expr)*
2898 shift_expr: arith_expr (('<<'|'>>') arith_expr)*
2899 arith_expr: term (('+'|'-') term)*
2900 term: factor (('*'|'@'|'/'|'%'|'//') factor)*
2901 factor: ('+'|'-'|'~') factor | power
2902 power: atom_expr ['**' factor]
2903 atom_expr: [AWAIT] atom trailer*
2904 yield_expr: 'yield' [yield_arg]
2905 */
2906
2907 asdl_seq *seq;
2908 int i;
2909
2910 loop:
2911 switch (TYPE(n)) {
2912 case namedexpr_test:
2913 if (NCH(n) == 3)
2914 return ast_for_namedexpr(c, n);
2915 /* Fallthrough */
2916 case test:
2917 case test_nocond:
2918 if (TYPE(CHILD(n, 0)) == lambdef ||
2919 TYPE(CHILD(n, 0)) == lambdef_nocond)
2920 return ast_for_lambdef(c, CHILD(n, 0));
2921 else if (NCH(n) > 1)
2922 return ast_for_ifexpr(c, n);
2923 /* Fallthrough */
2924 case or_test:
2925 case and_test:
2926 if (NCH(n) == 1) {
2927 n = CHILD(n, 0);
2928 goto loop;
2929 }
2930 seq = _Py_asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
2931 if (!seq)
2932 return NULL;
2933 for (i = 0; i < NCH(n); i += 2) {
2934 expr_ty e = ast_for_expr(c, CHILD(n, i));
2935 if (!e)
2936 return NULL;
2937 asdl_seq_SET(seq, i / 2, e);
2938 }
2939 if (!strcmp(STR(CHILD(n, 1)), "and"))
2940 return BoolOp(And, seq, LINENO(n), n->n_col_offset,
2941 n->n_end_lineno, n->n_end_col_offset,
2942 c->c_arena);
2943 assert(!strcmp(STR(CHILD(n, 1)), "or"));
2944 return BoolOp(Or, seq, LINENO(n), n->n_col_offset,
2945 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2946 case not_test:
2947 if (NCH(n) == 1) {
2948 n = CHILD(n, 0);
2949 goto loop;
2950 }
2951 else {
2952 expr_ty expression = ast_for_expr(c, CHILD(n, 1));
2953 if (!expression)
2954 return NULL;
2955
2956 return UnaryOp(Not, expression, LINENO(n), n->n_col_offset,
2957 n->n_end_lineno, n->n_end_col_offset,
2958 c->c_arena);
2959 }
2960 case comparison:
2961 if (NCH(n) == 1) {
2962 n = CHILD(n, 0);
2963 goto loop;
2964 }
2965 else {
2966 expr_ty expression;
2967 asdl_int_seq *ops;
2968 asdl_seq *cmps;
2969 ops = _Py_asdl_int_seq_new(NCH(n) / 2, c->c_arena);
2970 if (!ops)
2971 return NULL;
2972 cmps = _Py_asdl_seq_new(NCH(n) / 2, c->c_arena);
2973 if (!cmps) {
2974 return NULL;
2975 }
2976 for (i = 1; i < NCH(n); i += 2) {
2977 cmpop_ty newoperator;
2978
2979 newoperator = ast_for_comp_op(c, CHILD(n, i));
2980 if (!newoperator) {
2981 return NULL;
2982 }
2983
2984 expression = ast_for_expr(c, CHILD(n, i + 1));
2985 if (!expression) {
2986 return NULL;
2987 }
2988
2989 asdl_seq_SET(ops, i / 2, newoperator);
2990 asdl_seq_SET(cmps, i / 2, expression);
2991 }
2992 expression = ast_for_expr(c, CHILD(n, 0));
2993 if (!expression) {
2994 return NULL;
2995 }
2996
2997 return Compare(expression, ops, cmps, LINENO(n), n->n_col_offset,
2998 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
2999 }
3000
3001 case star_expr:
3002 return ast_for_starred(c, n);
3003 /* The next five cases all handle BinOps. The main body of code
3004 is the same in each case, but the switch turned inside out to
3005 reuse the code for each type of operator.
3006 */
3007 case expr:
3008 case xor_expr:
3009 case and_expr:
3010 case shift_expr:
3011 case arith_expr:
3012 case term:
3013 if (NCH(n) == 1) {
3014 n = CHILD(n, 0);
3015 goto loop;
3016 }
3017 return ast_for_binop(c, n);
3018 case yield_expr: {
3019 node *an = NULL;
3020 node *en = NULL;
3021 int is_from = 0;
3022 expr_ty exp = NULL;
3023 if (NCH(n) > 1)
3024 an = CHILD(n, 1); /* yield_arg */
3025 if (an) {
3026 en = CHILD(an, NCH(an) - 1);
3027 if (NCH(an) == 2) {
3028 is_from = 1;
3029 exp = ast_for_expr(c, en);
3030 }
3031 else
3032 exp = ast_for_testlist(c, en);
3033 if (!exp)
3034 return NULL;
3035 }
3036 if (is_from)
3037 return YieldFrom(exp, LINENO(n), n->n_col_offset,
3038 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3039 return Yield(exp, LINENO(n), n->n_col_offset,
3040 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3041 }
3042 case factor:
3043 if (NCH(n) == 1) {
3044 n = CHILD(n, 0);
3045 goto loop;
3046 }
3047 return ast_for_factor(c, n);
3048 case power:
3049 return ast_for_power(c, n);
3050 default:
3051 PyErr_Format(PyExc_SystemError, "unhandled expr: %d", TYPE(n));
3052 return NULL;
3053 }
3054 /* should never get here unless if error is set */
3055 return NULL;
3056 }
3057
3058 static expr_ty
ast_for_call(struct compiling * c,const node * n,expr_ty func,const node * start,const node * maybegenbeg,const node * closepar)3059 ast_for_call(struct compiling *c, const node *n, expr_ty func,
3060 const node *start, const node *maybegenbeg, const node *closepar)
3061 {
3062 /*
3063 arglist: argument (',' argument)* [',']
3064 argument: ( test [comp_for] | '*' test | test '=' test | '**' test )
3065 */
3066
3067 int i, nargs, nkeywords;
3068 int ndoublestars;
3069 asdl_seq *args;
3070 asdl_seq *keywords;
3071
3072 REQ(n, arglist);
3073
3074 nargs = 0;
3075 nkeywords = 0;
3076 for (i = 0; i < NCH(n); i++) {
3077 node *ch = CHILD(n, i);
3078 if (TYPE(ch) == argument) {
3079 if (NCH(ch) == 1)
3080 nargs++;
3081 else if (TYPE(CHILD(ch, 1)) == comp_for) {
3082 nargs++;
3083 if (!maybegenbeg) {
3084 ast_error(c, ch, "invalid syntax");
3085 return NULL;
3086 }
3087 if (NCH(n) > 1) {
3088 ast_error(c, ch, "Generator expression must be parenthesized");
3089 return NULL;
3090 }
3091 }
3092 else if (TYPE(CHILD(ch, 0)) == STAR)
3093 nargs++;
3094 else if (TYPE(CHILD(ch, 1)) == COLONEQUAL) {
3095 nargs++;
3096 }
3097 else
3098 /* TYPE(CHILD(ch, 0)) == DOUBLESTAR or keyword argument */
3099 nkeywords++;
3100 }
3101 }
3102
3103 args = _Py_asdl_seq_new(nargs, c->c_arena);
3104 if (!args)
3105 return NULL;
3106 keywords = _Py_asdl_seq_new(nkeywords, c->c_arena);
3107 if (!keywords)
3108 return NULL;
3109
3110 nargs = 0; /* positional arguments + iterable argument unpackings */
3111 nkeywords = 0; /* keyword arguments + keyword argument unpackings */
3112 ndoublestars = 0; /* just keyword argument unpackings */
3113 for (i = 0; i < NCH(n); i++) {
3114 node *ch = CHILD(n, i);
3115 if (TYPE(ch) == argument) {
3116 expr_ty e;
3117 node *chch = CHILD(ch, 0);
3118 if (NCH(ch) == 1) {
3119 /* a positional argument */
3120 if (nkeywords) {
3121 if (ndoublestars) {
3122 ast_error(c, chch,
3123 "positional argument follows "
3124 "keyword argument unpacking");
3125 }
3126 else {
3127 ast_error(c, chch,
3128 "positional argument follows "
3129 "keyword argument");
3130 }
3131 return NULL;
3132 }
3133 e = ast_for_expr(c, chch);
3134 if (!e)
3135 return NULL;
3136 asdl_seq_SET(args, nargs++, e);
3137 }
3138 else if (TYPE(chch) == STAR) {
3139 /* an iterable argument unpacking */
3140 expr_ty starred;
3141 if (ndoublestars) {
3142 ast_error(c, chch,
3143 "iterable argument unpacking follows "
3144 "keyword argument unpacking");
3145 return NULL;
3146 }
3147 e = ast_for_expr(c, CHILD(ch, 1));
3148 if (!e)
3149 return NULL;
3150 starred = Starred(e, Load, LINENO(chch),
3151 chch->n_col_offset,
3152 e->end_lineno, e->end_col_offset,
3153 c->c_arena);
3154 if (!starred)
3155 return NULL;
3156 asdl_seq_SET(args, nargs++, starred);
3157
3158 }
3159 else if (TYPE(chch) == DOUBLESTAR) {
3160 /* a keyword argument unpacking */
3161 keyword_ty kw;
3162 i++;
3163 e = ast_for_expr(c, CHILD(ch, 1));
3164 if (!e)
3165 return NULL;
3166 kw = keyword(NULL, e, c->c_arena);
3167 asdl_seq_SET(keywords, nkeywords++, kw);
3168 ndoublestars++;
3169 }
3170 else if (TYPE(CHILD(ch, 1)) == comp_for) {
3171 /* the lone generator expression */
3172 e = copy_location(ast_for_genexp(c, ch), maybegenbeg, closepar);
3173 if (!e)
3174 return NULL;
3175 asdl_seq_SET(args, nargs++, e);
3176 }
3177 else if (TYPE(CHILD(ch, 1)) == COLONEQUAL) {
3178 /* treat colon equal as positional argument */
3179 if (nkeywords) {
3180 if (ndoublestars) {
3181 ast_error(c, chch,
3182 "positional argument follows "
3183 "keyword argument unpacking");
3184 }
3185 else {
3186 ast_error(c, chch,
3187 "positional argument follows "
3188 "keyword argument");
3189 }
3190 return NULL;
3191 }
3192 e = ast_for_namedexpr(c, ch);
3193 if (!e)
3194 return NULL;
3195 asdl_seq_SET(args, nargs++, e);
3196 }
3197 else {
3198 /* a keyword argument */
3199 keyword_ty kw;
3200 identifier key, tmp;
3201 int k;
3202
3203 // To remain LL(1), the grammar accepts any test (basically, any
3204 // expression) in the keyword slot of a call site. So, we need
3205 // to manually enforce that the keyword is a NAME here.
3206 static const int name_tree[] = {
3207 test,
3208 or_test,
3209 and_test,
3210 not_test,
3211 comparison,
3212 expr,
3213 xor_expr,
3214 and_expr,
3215 shift_expr,
3216 arith_expr,
3217 term,
3218 factor,
3219 power,
3220 atom_expr,
3221 atom,
3222 0,
3223 };
3224 node *expr_node = chch;
3225 for (int i = 0; name_tree[i]; i++) {
3226 if (TYPE(expr_node) != name_tree[i])
3227 break;
3228 if (NCH(expr_node) != 1)
3229 break;
3230 expr_node = CHILD(expr_node, 0);
3231 }
3232 if (TYPE(expr_node) != NAME) {
3233 ast_error(c, chch,
3234 "expression cannot contain assignment, "
3235 "perhaps you meant \"==\"?");
3236 return NULL;
3237 }
3238 key = new_identifier(STR(expr_node), c);
3239 if (key == NULL) {
3240 return NULL;
3241 }
3242 if (forbidden_name(c, key, chch, 1)) {
3243 return NULL;
3244 }
3245 for (k = 0; k < nkeywords; k++) {
3246 tmp = ((keyword_ty)asdl_seq_GET(keywords, k))->arg;
3247 if (tmp && !PyUnicode_Compare(tmp, key)) {
3248 ast_error(c, chch,
3249 "keyword argument repeated");
3250 return NULL;
3251 }
3252 }
3253 e = ast_for_expr(c, CHILD(ch, 2));
3254 if (!e)
3255 return NULL;
3256 kw = keyword(key, e, c->c_arena);
3257 if (!kw)
3258 return NULL;
3259 asdl_seq_SET(keywords, nkeywords++, kw);
3260 }
3261 }
3262 }
3263
3264 return Call(func, args, keywords, LINENO(start), start->n_col_offset,
3265 closepar->n_end_lineno, closepar->n_end_col_offset, c->c_arena);
3266 }
3267
3268 static expr_ty
ast_for_testlist(struct compiling * c,const node * n)3269 ast_for_testlist(struct compiling *c, const node* n)
3270 {
3271 /* testlist_comp: test (comp_for | (',' test)* [',']) */
3272 /* testlist: test (',' test)* [','] */
3273 assert(NCH(n) > 0);
3274 if (TYPE(n) == testlist_comp) {
3275 if (NCH(n) > 1)
3276 assert(TYPE(CHILD(n, 1)) != comp_for);
3277 }
3278 else {
3279 assert(TYPE(n) == testlist ||
3280 TYPE(n) == testlist_star_expr);
3281 }
3282 if (NCH(n) == 1)
3283 return ast_for_expr(c, CHILD(n, 0));
3284 else {
3285 asdl_seq *tmp = seq_for_testlist(c, n);
3286 if (!tmp)
3287 return NULL;
3288 return Tuple(tmp, Load, LINENO(n), n->n_col_offset,
3289 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3290 }
3291 }
3292
3293 static stmt_ty
ast_for_expr_stmt(struct compiling * c,const node * n)3294 ast_for_expr_stmt(struct compiling *c, const node *n)
3295 {
3296 REQ(n, expr_stmt);
3297 /* expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) |
3298 [('=' (yield_expr|testlist_star_expr))+ [TYPE_COMMENT]] )
3299 annassign: ':' test ['=' (yield_expr|testlist)]
3300 testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [',']
3301 augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' |
3302 '<<=' | '>>=' | '**=' | '//=')
3303 test: ... here starts the operator precedence dance
3304 */
3305 int num = NCH(n);
3306
3307 if (num == 1) {
3308 expr_ty e = ast_for_testlist(c, CHILD(n, 0));
3309 if (!e)
3310 return NULL;
3311
3312 return Expr(e, LINENO(n), n->n_col_offset,
3313 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3314 }
3315 else if (TYPE(CHILD(n, 1)) == augassign) {
3316 expr_ty expr1, expr2;
3317 operator_ty newoperator;
3318 node *ch = CHILD(n, 0);
3319
3320 expr1 = ast_for_testlist(c, ch);
3321 if (!expr1)
3322 return NULL;
3323 if(!set_context(c, expr1, Store, ch))
3324 return NULL;
3325 /* set_context checks that most expressions are not the left side.
3326 Augmented assignments can only have a name, a subscript, or an
3327 attribute on the left, though, so we have to explicitly check for
3328 those. */
3329 switch (expr1->kind) {
3330 case Name_kind:
3331 case Attribute_kind:
3332 case Subscript_kind:
3333 break;
3334 default:
3335 ast_error(c, ch, "illegal expression for augmented assignment");
3336 return NULL;
3337 }
3338
3339 ch = CHILD(n, 2);
3340 if (TYPE(ch) == testlist)
3341 expr2 = ast_for_testlist(c, ch);
3342 else
3343 expr2 = ast_for_expr(c, ch);
3344 if (!expr2)
3345 return NULL;
3346
3347 newoperator = ast_for_augassign(c, CHILD(n, 1));
3348 if (!newoperator)
3349 return NULL;
3350
3351 return AugAssign(expr1, newoperator, expr2, LINENO(n), n->n_col_offset,
3352 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3353 }
3354 else if (TYPE(CHILD(n, 1)) == annassign) {
3355 expr_ty expr1, expr2, expr3;
3356 node *ch = CHILD(n, 0);
3357 node *deep, *ann = CHILD(n, 1);
3358 int simple = 1;
3359
3360 /* AnnAssigns are only allowed in Python 3.6 or greater */
3361 if (c->c_feature_version < 6) {
3362 ast_error(c, ch,
3363 "Variable annotation syntax is only supported in Python 3.6 and greater");
3364 return NULL;
3365 }
3366
3367 /* we keep track of parens to qualify (x) as expression not name */
3368 deep = ch;
3369 while (NCH(deep) == 1) {
3370 deep = CHILD(deep, 0);
3371 }
3372 if (NCH(deep) > 0 && TYPE(CHILD(deep, 0)) == LPAR) {
3373 simple = 0;
3374 }
3375 expr1 = ast_for_testlist(c, ch);
3376 if (!expr1) {
3377 return NULL;
3378 }
3379 switch (expr1->kind) {
3380 case Name_kind:
3381 if (forbidden_name(c, expr1->v.Name.id, n, 0)) {
3382 return NULL;
3383 }
3384 expr1->v.Name.ctx = Store;
3385 break;
3386 case Attribute_kind:
3387 if (forbidden_name(c, expr1->v.Attribute.attr, n, 1)) {
3388 return NULL;
3389 }
3390 expr1->v.Attribute.ctx = Store;
3391 break;
3392 case Subscript_kind:
3393 expr1->v.Subscript.ctx = Store;
3394 break;
3395 case List_kind:
3396 ast_error(c, ch,
3397 "only single target (not list) can be annotated");
3398 return NULL;
3399 case Tuple_kind:
3400 ast_error(c, ch,
3401 "only single target (not tuple) can be annotated");
3402 return NULL;
3403 default:
3404 ast_error(c, ch,
3405 "illegal target for annotation");
3406 return NULL;
3407 }
3408
3409 if (expr1->kind != Name_kind) {
3410 simple = 0;
3411 }
3412 ch = CHILD(ann, 1);
3413 expr2 = ast_for_expr(c, ch);
3414 if (!expr2) {
3415 return NULL;
3416 }
3417 if (NCH(ann) == 2) {
3418 return AnnAssign(expr1, expr2, NULL, simple,
3419 LINENO(n), n->n_col_offset,
3420 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3421 }
3422 else {
3423 ch = CHILD(ann, 3);
3424 if (TYPE(ch) == testlist_star_expr) {
3425 expr3 = ast_for_testlist(c, ch);
3426 }
3427 else {
3428 expr3 = ast_for_expr(c, ch);
3429 }
3430 if (!expr3) {
3431 return NULL;
3432 }
3433 return AnnAssign(expr1, expr2, expr3, simple,
3434 LINENO(n), n->n_col_offset,
3435 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3436 }
3437 }
3438 else {
3439 int i, nch_minus_type, has_type_comment;
3440 asdl_seq *targets;
3441 node *value;
3442 expr_ty expression;
3443 string type_comment;
3444
3445 /* a normal assignment */
3446 REQ(CHILD(n, 1), EQUAL);
3447
3448 has_type_comment = TYPE(CHILD(n, num - 1)) == TYPE_COMMENT;
3449 nch_minus_type = num - has_type_comment;
3450
3451 targets = _Py_asdl_seq_new(nch_minus_type / 2, c->c_arena);
3452 if (!targets)
3453 return NULL;
3454 for (i = 0; i < nch_minus_type - 2; i += 2) {
3455 expr_ty e;
3456 node *ch = CHILD(n, i);
3457 if (TYPE(ch) == yield_expr) {
3458 ast_error(c, ch, "assignment to yield expression not possible");
3459 return NULL;
3460 }
3461 e = ast_for_testlist(c, ch);
3462 if (!e)
3463 return NULL;
3464
3465 /* set context to assign */
3466 if (!set_context(c, e, Store, CHILD(n, i)))
3467 return NULL;
3468
3469 asdl_seq_SET(targets, i / 2, e);
3470 }
3471 value = CHILD(n, nch_minus_type - 1);
3472 if (TYPE(value) == testlist_star_expr)
3473 expression = ast_for_testlist(c, value);
3474 else
3475 expression = ast_for_expr(c, value);
3476 if (!expression)
3477 return NULL;
3478 if (has_type_comment) {
3479 type_comment = NEW_TYPE_COMMENT(CHILD(n, nch_minus_type));
3480 if (!type_comment)
3481 return NULL;
3482 }
3483 else
3484 type_comment = NULL;
3485 return Assign(targets, expression, type_comment, LINENO(n), n->n_col_offset,
3486 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3487 }
3488 }
3489
3490
3491 static asdl_seq *
ast_for_exprlist(struct compiling * c,const node * n,expr_context_ty context)3492 ast_for_exprlist(struct compiling *c, const node *n, expr_context_ty context)
3493 {
3494 asdl_seq *seq;
3495 int i;
3496 expr_ty e;
3497
3498 REQ(n, exprlist);
3499
3500 seq = _Py_asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
3501 if (!seq)
3502 return NULL;
3503 for (i = 0; i < NCH(n); i += 2) {
3504 e = ast_for_expr(c, CHILD(n, i));
3505 if (!e)
3506 return NULL;
3507 asdl_seq_SET(seq, i / 2, e);
3508 if (context && !set_context(c, e, context, CHILD(n, i)))
3509 return NULL;
3510 }
3511 return seq;
3512 }
3513
3514 static stmt_ty
ast_for_del_stmt(struct compiling * c,const node * n)3515 ast_for_del_stmt(struct compiling *c, const node *n)
3516 {
3517 asdl_seq *expr_list;
3518
3519 /* del_stmt: 'del' exprlist */
3520 REQ(n, del_stmt);
3521
3522 expr_list = ast_for_exprlist(c, CHILD(n, 1), Del);
3523 if (!expr_list)
3524 return NULL;
3525 return Delete(expr_list, LINENO(n), n->n_col_offset,
3526 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3527 }
3528
3529 static stmt_ty
ast_for_flow_stmt(struct compiling * c,const node * n)3530 ast_for_flow_stmt(struct compiling *c, const node *n)
3531 {
3532 /*
3533 flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt
3534 | yield_stmt
3535 break_stmt: 'break'
3536 continue_stmt: 'continue'
3537 return_stmt: 'return' [testlist]
3538 yield_stmt: yield_expr
3539 yield_expr: 'yield' testlist | 'yield' 'from' test
3540 raise_stmt: 'raise' [test [',' test [',' test]]]
3541 */
3542 node *ch;
3543
3544 REQ(n, flow_stmt);
3545 ch = CHILD(n, 0);
3546 switch (TYPE(ch)) {
3547 case break_stmt:
3548 return Break(LINENO(n), n->n_col_offset,
3549 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3550 case continue_stmt:
3551 return Continue(LINENO(n), n->n_col_offset,
3552 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3553 case yield_stmt: { /* will reduce to yield_expr */
3554 expr_ty exp = ast_for_expr(c, CHILD(ch, 0));
3555 if (!exp)
3556 return NULL;
3557 return Expr(exp, LINENO(n), n->n_col_offset,
3558 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3559 }
3560 case return_stmt:
3561 if (NCH(ch) == 1)
3562 return Return(NULL, LINENO(n), n->n_col_offset,
3563 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3564 else {
3565 expr_ty expression = ast_for_testlist(c, CHILD(ch, 1));
3566 if (!expression)
3567 return NULL;
3568 return Return(expression, LINENO(n), n->n_col_offset,
3569 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3570 }
3571 case raise_stmt:
3572 if (NCH(ch) == 1)
3573 return Raise(NULL, NULL, LINENO(n), n->n_col_offset,
3574 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3575 else if (NCH(ch) >= 2) {
3576 expr_ty cause = NULL;
3577 expr_ty expression = ast_for_expr(c, CHILD(ch, 1));
3578 if (!expression)
3579 return NULL;
3580 if (NCH(ch) == 4) {
3581 cause = ast_for_expr(c, CHILD(ch, 3));
3582 if (!cause)
3583 return NULL;
3584 }
3585 return Raise(expression, cause, LINENO(n), n->n_col_offset,
3586 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3587 }
3588 /* fall through */
3589 default:
3590 PyErr_Format(PyExc_SystemError,
3591 "unexpected flow_stmt: %d", TYPE(ch));
3592 return NULL;
3593 }
3594 }
3595
3596 static alias_ty
alias_for_import_name(struct compiling * c,const node * n,int store)3597 alias_for_import_name(struct compiling *c, const node *n, int store)
3598 {
3599 /*
3600 import_as_name: NAME ['as' NAME]
3601 dotted_as_name: dotted_name ['as' NAME]
3602 dotted_name: NAME ('.' NAME)*
3603 */
3604 identifier str, name;
3605
3606 loop:
3607 switch (TYPE(n)) {
3608 case import_as_name: {
3609 node *name_node = CHILD(n, 0);
3610 str = NULL;
3611 name = NEW_IDENTIFIER(name_node);
3612 if (!name)
3613 return NULL;
3614 if (NCH(n) == 3) {
3615 node *str_node = CHILD(n, 2);
3616 str = NEW_IDENTIFIER(str_node);
3617 if (!str)
3618 return NULL;
3619 if (store && forbidden_name(c, str, str_node, 0))
3620 return NULL;
3621 }
3622 else {
3623 if (forbidden_name(c, name, name_node, 0))
3624 return NULL;
3625 }
3626 return alias(name, str, c->c_arena);
3627 }
3628 case dotted_as_name:
3629 if (NCH(n) == 1) {
3630 n = CHILD(n, 0);
3631 goto loop;
3632 }
3633 else {
3634 node *asname_node = CHILD(n, 2);
3635 alias_ty a = alias_for_import_name(c, CHILD(n, 0), 0);
3636 if (!a)
3637 return NULL;
3638 assert(!a->asname);
3639 a->asname = NEW_IDENTIFIER(asname_node);
3640 if (!a->asname)
3641 return NULL;
3642 if (forbidden_name(c, a->asname, asname_node, 0))
3643 return NULL;
3644 return a;
3645 }
3646 case dotted_name:
3647 if (NCH(n) == 1) {
3648 node *name_node = CHILD(n, 0);
3649 name = NEW_IDENTIFIER(name_node);
3650 if (!name)
3651 return NULL;
3652 if (store && forbidden_name(c, name, name_node, 0))
3653 return NULL;
3654 return alias(name, NULL, c->c_arena);
3655 }
3656 else {
3657 /* Create a string of the form "a.b.c" */
3658 int i;
3659 size_t len;
3660 char *s;
3661 PyObject *uni;
3662
3663 len = 0;
3664 for (i = 0; i < NCH(n); i += 2)
3665 /* length of string plus one for the dot */
3666 len += strlen(STR(CHILD(n, i))) + 1;
3667 len--; /* the last name doesn't have a dot */
3668 str = PyBytes_FromStringAndSize(NULL, len);
3669 if (!str)
3670 return NULL;
3671 s = PyBytes_AS_STRING(str);
3672 if (!s)
3673 return NULL;
3674 for (i = 0; i < NCH(n); i += 2) {
3675 char *sch = STR(CHILD(n, i));
3676 strcpy(s, STR(CHILD(n, i)));
3677 s += strlen(sch);
3678 *s++ = '.';
3679 }
3680 --s;
3681 *s = '\0';
3682 uni = PyUnicode_DecodeUTF8(PyBytes_AS_STRING(str),
3683 PyBytes_GET_SIZE(str),
3684 NULL);
3685 Py_DECREF(str);
3686 if (!uni)
3687 return NULL;
3688 str = uni;
3689 PyUnicode_InternInPlace(&str);
3690 if (PyArena_AddPyObject(c->c_arena, str) < 0) {
3691 Py_DECREF(str);
3692 return NULL;
3693 }
3694 return alias(str, NULL, c->c_arena);
3695 }
3696 case STAR:
3697 str = PyUnicode_InternFromString("*");
3698 if (!str)
3699 return NULL;
3700 if (PyArena_AddPyObject(c->c_arena, str) < 0) {
3701 Py_DECREF(str);
3702 return NULL;
3703 }
3704 return alias(str, NULL, c->c_arena);
3705 default:
3706 PyErr_Format(PyExc_SystemError,
3707 "unexpected import name: %d", TYPE(n));
3708 return NULL;
3709 }
3710
3711 PyErr_SetString(PyExc_SystemError, "unhandled import name condition");
3712 return NULL;
3713 }
3714
3715 static stmt_ty
ast_for_import_stmt(struct compiling * c,const node * n)3716 ast_for_import_stmt(struct compiling *c, const node *n)
3717 {
3718 /*
3719 import_stmt: import_name | import_from
3720 import_name: 'import' dotted_as_names
3721 import_from: 'from' (('.' | '...')* dotted_name | ('.' | '...')+)
3722 'import' ('*' | '(' import_as_names ')' | import_as_names)
3723 */
3724 int lineno;
3725 int col_offset;
3726 int i;
3727 asdl_seq *aliases;
3728
3729 REQ(n, import_stmt);
3730 lineno = LINENO(n);
3731 col_offset = n->n_col_offset;
3732 n = CHILD(n, 0);
3733 if (TYPE(n) == import_name) {
3734 n = CHILD(n, 1);
3735 REQ(n, dotted_as_names);
3736 aliases = _Py_asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
3737 if (!aliases)
3738 return NULL;
3739 for (i = 0; i < NCH(n); i += 2) {
3740 alias_ty import_alias = alias_for_import_name(c, CHILD(n, i), 1);
3741 if (!import_alias)
3742 return NULL;
3743 asdl_seq_SET(aliases, i / 2, import_alias);
3744 }
3745 // Even though n is modified above, the end position is not changed
3746 return Import(aliases, lineno, col_offset,
3747 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3748 }
3749 else if (TYPE(n) == import_from) {
3750 int n_children;
3751 int idx, ndots = 0;
3752 const node *n_copy = n;
3753 alias_ty mod = NULL;
3754 identifier modname = NULL;
3755
3756 /* Count the number of dots (for relative imports) and check for the
3757 optional module name */
3758 for (idx = 1; idx < NCH(n); idx++) {
3759 if (TYPE(CHILD(n, idx)) == dotted_name) {
3760 mod = alias_for_import_name(c, CHILD(n, idx), 0);
3761 if (!mod)
3762 return NULL;
3763 idx++;
3764 break;
3765 } else if (TYPE(CHILD(n, idx)) == ELLIPSIS) {
3766 /* three consecutive dots are tokenized as one ELLIPSIS */
3767 ndots += 3;
3768 continue;
3769 } else if (TYPE(CHILD(n, idx)) != DOT) {
3770 break;
3771 }
3772 ndots++;
3773 }
3774 idx++; /* skip over the 'import' keyword */
3775 switch (TYPE(CHILD(n, idx))) {
3776 case STAR:
3777 /* from ... import * */
3778 n = CHILD(n, idx);
3779 n_children = 1;
3780 break;
3781 case LPAR:
3782 /* from ... import (x, y, z) */
3783 n = CHILD(n, idx + 1);
3784 n_children = NCH(n);
3785 break;
3786 case import_as_names:
3787 /* from ... import x, y, z */
3788 n = CHILD(n, idx);
3789 n_children = NCH(n);
3790 if (n_children % 2 == 0) {
3791 ast_error(c, n,
3792 "trailing comma not allowed without"
3793 " surrounding parentheses");
3794 return NULL;
3795 }
3796 break;
3797 default:
3798 ast_error(c, n, "Unexpected node-type in from-import");
3799 return NULL;
3800 }
3801
3802 aliases = _Py_asdl_seq_new((n_children + 1) / 2, c->c_arena);
3803 if (!aliases)
3804 return NULL;
3805
3806 /* handle "from ... import *" special b/c there's no children */
3807 if (TYPE(n) == STAR) {
3808 alias_ty import_alias = alias_for_import_name(c, n, 1);
3809 if (!import_alias)
3810 return NULL;
3811 asdl_seq_SET(aliases, 0, import_alias);
3812 }
3813 else {
3814 for (i = 0; i < NCH(n); i += 2) {
3815 alias_ty import_alias = alias_for_import_name(c, CHILD(n, i), 1);
3816 if (!import_alias)
3817 return NULL;
3818 asdl_seq_SET(aliases, i / 2, import_alias);
3819 }
3820 }
3821 if (mod != NULL)
3822 modname = mod->name;
3823 return ImportFrom(modname, aliases, ndots, lineno, col_offset,
3824 n_copy->n_end_lineno, n_copy->n_end_col_offset,
3825 c->c_arena);
3826 }
3827 PyErr_Format(PyExc_SystemError,
3828 "unknown import statement: starts with command '%s'",
3829 STR(CHILD(n, 0)));
3830 return NULL;
3831 }
3832
3833 static stmt_ty
ast_for_global_stmt(struct compiling * c,const node * n)3834 ast_for_global_stmt(struct compiling *c, const node *n)
3835 {
3836 /* global_stmt: 'global' NAME (',' NAME)* */
3837 identifier name;
3838 asdl_seq *s;
3839 int i;
3840
3841 REQ(n, global_stmt);
3842 s = _Py_asdl_seq_new(NCH(n) / 2, c->c_arena);
3843 if (!s)
3844 return NULL;
3845 for (i = 1; i < NCH(n); i += 2) {
3846 name = NEW_IDENTIFIER(CHILD(n, i));
3847 if (!name)
3848 return NULL;
3849 asdl_seq_SET(s, i / 2, name);
3850 }
3851 return Global(s, LINENO(n), n->n_col_offset,
3852 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3853 }
3854
3855 static stmt_ty
ast_for_nonlocal_stmt(struct compiling * c,const node * n)3856 ast_for_nonlocal_stmt(struct compiling *c, const node *n)
3857 {
3858 /* nonlocal_stmt: 'nonlocal' NAME (',' NAME)* */
3859 identifier name;
3860 asdl_seq *s;
3861 int i;
3862
3863 REQ(n, nonlocal_stmt);
3864 s = _Py_asdl_seq_new(NCH(n) / 2, c->c_arena);
3865 if (!s)
3866 return NULL;
3867 for (i = 1; i < NCH(n); i += 2) {
3868 name = NEW_IDENTIFIER(CHILD(n, i));
3869 if (!name)
3870 return NULL;
3871 asdl_seq_SET(s, i / 2, name);
3872 }
3873 return Nonlocal(s, LINENO(n), n->n_col_offset,
3874 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3875 }
3876
3877 static stmt_ty
ast_for_assert_stmt(struct compiling * c,const node * n)3878 ast_for_assert_stmt(struct compiling *c, const node *n)
3879 {
3880 /* assert_stmt: 'assert' test [',' test] */
3881 REQ(n, assert_stmt);
3882 if (NCH(n) == 2) {
3883 expr_ty expression = ast_for_expr(c, CHILD(n, 1));
3884 if (!expression)
3885 return NULL;
3886 return Assert(expression, NULL, LINENO(n), n->n_col_offset,
3887 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3888 }
3889 else if (NCH(n) == 4) {
3890 expr_ty expr1, expr2;
3891
3892 expr1 = ast_for_expr(c, CHILD(n, 1));
3893 if (!expr1)
3894 return NULL;
3895 expr2 = ast_for_expr(c, CHILD(n, 3));
3896 if (!expr2)
3897 return NULL;
3898
3899 return Assert(expr1, expr2, LINENO(n), n->n_col_offset,
3900 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
3901 }
3902 PyErr_Format(PyExc_SystemError,
3903 "improper number of parts to 'assert' statement: %d",
3904 NCH(n));
3905 return NULL;
3906 }
3907
3908 static asdl_seq *
ast_for_suite(struct compiling * c,const node * n)3909 ast_for_suite(struct compiling *c, const node *n)
3910 {
3911 /* suite: simple_stmt | NEWLINE [TYPE_COMMENT NEWLINE] INDENT stmt+ DEDENT */
3912 asdl_seq *seq;
3913 stmt_ty s;
3914 int i, total, num, end, pos = 0;
3915 node *ch;
3916
3917 if (TYPE(n) != func_body_suite) {
3918 REQ(n, suite);
3919 }
3920
3921 total = num_stmts(n);
3922 seq = _Py_asdl_seq_new(total, c->c_arena);
3923 if (!seq)
3924 return NULL;
3925 if (TYPE(CHILD(n, 0)) == simple_stmt) {
3926 n = CHILD(n, 0);
3927 /* simple_stmt always ends with a NEWLINE,
3928 and may have a trailing SEMI
3929 */
3930 end = NCH(n) - 1;
3931 if (TYPE(CHILD(n, end - 1)) == SEMI)
3932 end--;
3933 /* loop by 2 to skip semi-colons */
3934 for (i = 0; i < end; i += 2) {
3935 ch = CHILD(n, i);
3936 s = ast_for_stmt(c, ch);
3937 if (!s)
3938 return NULL;
3939 asdl_seq_SET(seq, pos++, s);
3940 }
3941 }
3942 else {
3943 i = 2;
3944 if (TYPE(CHILD(n, 1)) == TYPE_COMMENT) {
3945 i += 2;
3946 REQ(CHILD(n, 2), NEWLINE);
3947 }
3948
3949 for (; i < (NCH(n) - 1); i++) {
3950 ch = CHILD(n, i);
3951 REQ(ch, stmt);
3952 num = num_stmts(ch);
3953 if (num == 1) {
3954 /* small_stmt or compound_stmt with only one child */
3955 s = ast_for_stmt(c, ch);
3956 if (!s)
3957 return NULL;
3958 asdl_seq_SET(seq, pos++, s);
3959 }
3960 else {
3961 int j;
3962 ch = CHILD(ch, 0);
3963 REQ(ch, simple_stmt);
3964 for (j = 0; j < NCH(ch); j += 2) {
3965 /* statement terminates with a semi-colon ';' */
3966 if (NCH(CHILD(ch, j)) == 0) {
3967 assert((j + 1) == NCH(ch));
3968 break;
3969 }
3970 s = ast_for_stmt(c, CHILD(ch, j));
3971 if (!s)
3972 return NULL;
3973 asdl_seq_SET(seq, pos++, s);
3974 }
3975 }
3976 }
3977 }
3978 assert(pos == seq->size);
3979 return seq;
3980 }
3981
3982 static void
get_last_end_pos(asdl_seq * s,int * end_lineno,int * end_col_offset)3983 get_last_end_pos(asdl_seq *s, int *end_lineno, int *end_col_offset)
3984 {
3985 Py_ssize_t tot = asdl_seq_LEN(s);
3986 // There must be no empty suites.
3987 assert(tot > 0);
3988 stmt_ty last = asdl_seq_GET(s, tot - 1);
3989 *end_lineno = last->end_lineno;
3990 *end_col_offset = last->end_col_offset;
3991 }
3992
3993 static stmt_ty
ast_for_if_stmt(struct compiling * c,const node * n)3994 ast_for_if_stmt(struct compiling *c, const node *n)
3995 {
3996 /* if_stmt: 'if' test ':' suite ('elif' test ':' suite)*
3997 ['else' ':' suite]
3998 */
3999 char *s;
4000 int end_lineno, end_col_offset;
4001
4002 REQ(n, if_stmt);
4003
4004 if (NCH(n) == 4) {
4005 expr_ty expression;
4006 asdl_seq *suite_seq;
4007
4008 expression = ast_for_expr(c, CHILD(n, 1));
4009 if (!expression)
4010 return NULL;
4011 suite_seq = ast_for_suite(c, CHILD(n, 3));
4012 if (!suite_seq)
4013 return NULL;
4014 get_last_end_pos(suite_seq, &end_lineno, &end_col_offset);
4015
4016 return If(expression, suite_seq, NULL, LINENO(n), n->n_col_offset,
4017 end_lineno, end_col_offset, c->c_arena);
4018 }
4019
4020 s = STR(CHILD(n, 4));
4021 /* s[2], the third character in the string, will be
4022 's' for el_s_e, or
4023 'i' for el_i_f
4024 */
4025 if (s[2] == 's') {
4026 expr_ty expression;
4027 asdl_seq *seq1, *seq2;
4028
4029 expression = ast_for_expr(c, CHILD(n, 1));
4030 if (!expression)
4031 return NULL;
4032 seq1 = ast_for_suite(c, CHILD(n, 3));
4033 if (!seq1)
4034 return NULL;
4035 seq2 = ast_for_suite(c, CHILD(n, 6));
4036 if (!seq2)
4037 return NULL;
4038 get_last_end_pos(seq2, &end_lineno, &end_col_offset);
4039
4040 return If(expression, seq1, seq2, LINENO(n), n->n_col_offset,
4041 end_lineno, end_col_offset, c->c_arena);
4042 }
4043 else if (s[2] == 'i') {
4044 int i, n_elif, has_else = 0;
4045 expr_ty expression;
4046 asdl_seq *suite_seq;
4047 asdl_seq *orelse = NULL;
4048 n_elif = NCH(n) - 4;
4049 /* must reference the child n_elif+1 since 'else' token is third,
4050 not fourth, child from the end. */
4051 if (TYPE(CHILD(n, (n_elif + 1))) == NAME
4052 && STR(CHILD(n, (n_elif + 1)))[2] == 's') {
4053 has_else = 1;
4054 n_elif -= 3;
4055 }
4056 n_elif /= 4;
4057
4058 if (has_else) {
4059 asdl_seq *suite_seq2;
4060
4061 orelse = _Py_asdl_seq_new(1, c->c_arena);
4062 if (!orelse)
4063 return NULL;
4064 expression = ast_for_expr(c, CHILD(n, NCH(n) - 6));
4065 if (!expression)
4066 return NULL;
4067 suite_seq = ast_for_suite(c, CHILD(n, NCH(n) - 4));
4068 if (!suite_seq)
4069 return NULL;
4070 suite_seq2 = ast_for_suite(c, CHILD(n, NCH(n) - 1));
4071 if (!suite_seq2)
4072 return NULL;
4073 get_last_end_pos(suite_seq2, &end_lineno, &end_col_offset);
4074
4075 asdl_seq_SET(orelse, 0,
4076 If(expression, suite_seq, suite_seq2,
4077 LINENO(CHILD(n, NCH(n) - 7)),
4078 CHILD(n, NCH(n) - 7)->n_col_offset,
4079 end_lineno, end_col_offset, c->c_arena));
4080 /* the just-created orelse handled the last elif */
4081 n_elif--;
4082 }
4083
4084 for (i = 0; i < n_elif; i++) {
4085 int off = 5 + (n_elif - i - 1) * 4;
4086 asdl_seq *newobj = _Py_asdl_seq_new(1, c->c_arena);
4087 if (!newobj)
4088 return NULL;
4089 expression = ast_for_expr(c, CHILD(n, off));
4090 if (!expression)
4091 return NULL;
4092 suite_seq = ast_for_suite(c, CHILD(n, off + 2));
4093 if (!suite_seq)
4094 return NULL;
4095
4096 if (orelse != NULL) {
4097 get_last_end_pos(orelse, &end_lineno, &end_col_offset);
4098 } else {
4099 get_last_end_pos(suite_seq, &end_lineno, &end_col_offset);
4100 }
4101 asdl_seq_SET(newobj, 0,
4102 If(expression, suite_seq, orelse,
4103 LINENO(CHILD(n, off - 1)),
4104 CHILD(n, off - 1)->n_col_offset,
4105 end_lineno, end_col_offset, c->c_arena));
4106 orelse = newobj;
4107 }
4108 expression = ast_for_expr(c, CHILD(n, 1));
4109 if (!expression)
4110 return NULL;
4111 suite_seq = ast_for_suite(c, CHILD(n, 3));
4112 if (!suite_seq)
4113 return NULL;
4114 get_last_end_pos(orelse, &end_lineno, &end_col_offset);
4115 return If(expression, suite_seq, orelse,
4116 LINENO(n), n->n_col_offset,
4117 end_lineno, end_col_offset, c->c_arena);
4118 }
4119
4120 PyErr_Format(PyExc_SystemError,
4121 "unexpected token in 'if' statement: %s", s);
4122 return NULL;
4123 }
4124
4125 static stmt_ty
ast_for_while_stmt(struct compiling * c,const node * n)4126 ast_for_while_stmt(struct compiling *c, const node *n)
4127 {
4128 /* while_stmt: 'while' test ':' suite ['else' ':' suite] */
4129 REQ(n, while_stmt);
4130 int end_lineno, end_col_offset;
4131
4132 if (NCH(n) == 4) {
4133 expr_ty expression;
4134 asdl_seq *suite_seq;
4135
4136 expression = ast_for_expr(c, CHILD(n, 1));
4137 if (!expression)
4138 return NULL;
4139 suite_seq = ast_for_suite(c, CHILD(n, 3));
4140 if (!suite_seq)
4141 return NULL;
4142 get_last_end_pos(suite_seq, &end_lineno, &end_col_offset);
4143 return While(expression, suite_seq, NULL, LINENO(n), n->n_col_offset,
4144 end_lineno, end_col_offset, c->c_arena);
4145 }
4146 else if (NCH(n) == 7) {
4147 expr_ty expression;
4148 asdl_seq *seq1, *seq2;
4149
4150 expression = ast_for_expr(c, CHILD(n, 1));
4151 if (!expression)
4152 return NULL;
4153 seq1 = ast_for_suite(c, CHILD(n, 3));
4154 if (!seq1)
4155 return NULL;
4156 seq2 = ast_for_suite(c, CHILD(n, 6));
4157 if (!seq2)
4158 return NULL;
4159 get_last_end_pos(seq2, &end_lineno, &end_col_offset);
4160
4161 return While(expression, seq1, seq2, LINENO(n), n->n_col_offset,
4162 end_lineno, end_col_offset, c->c_arena);
4163 }
4164
4165 PyErr_Format(PyExc_SystemError,
4166 "wrong number of tokens for 'while' statement: %d",
4167 NCH(n));
4168 return NULL;
4169 }
4170
4171 static stmt_ty
ast_for_for_stmt(struct compiling * c,const node * n0,bool is_async)4172 ast_for_for_stmt(struct compiling *c, const node *n0, bool is_async)
4173 {
4174 const node * const n = is_async ? CHILD(n0, 1) : n0;
4175 asdl_seq *_target, *seq = NULL, *suite_seq;
4176 expr_ty expression;
4177 expr_ty target, first;
4178 const node *node_target;
4179 int end_lineno, end_col_offset;
4180 int has_type_comment;
4181 string type_comment;
4182
4183 if (is_async && c->c_feature_version < 5) {
4184 ast_error(c, n,
4185 "Async for loops are only supported in Python 3.5 and greater");
4186 return NULL;
4187 }
4188
4189 /* for_stmt: 'for' exprlist 'in' testlist ':' [TYPE_COMMENT] suite ['else' ':' suite] */
4190 REQ(n, for_stmt);
4191
4192 has_type_comment = TYPE(CHILD(n, 5)) == TYPE_COMMENT;
4193
4194 if (NCH(n) == 9 + has_type_comment) {
4195 seq = ast_for_suite(c, CHILD(n, 8 + has_type_comment));
4196 if (!seq)
4197 return NULL;
4198 }
4199
4200 node_target = CHILD(n, 1);
4201 _target = ast_for_exprlist(c, node_target, Store);
4202 if (!_target)
4203 return NULL;
4204 /* Check the # of children rather than the length of _target, since
4205 for x, in ... has 1 element in _target, but still requires a Tuple. */
4206 first = (expr_ty)asdl_seq_GET(_target, 0);
4207 if (NCH(node_target) == 1)
4208 target = first;
4209 else
4210 target = Tuple(_target, Store, first->lineno, first->col_offset,
4211 node_target->n_end_lineno, node_target->n_end_col_offset,
4212 c->c_arena);
4213
4214 expression = ast_for_testlist(c, CHILD(n, 3));
4215 if (!expression)
4216 return NULL;
4217 suite_seq = ast_for_suite(c, CHILD(n, 5 + has_type_comment));
4218 if (!suite_seq)
4219 return NULL;
4220
4221 if (seq != NULL) {
4222 get_last_end_pos(seq, &end_lineno, &end_col_offset);
4223 } else {
4224 get_last_end_pos(suite_seq, &end_lineno, &end_col_offset);
4225 }
4226
4227 if (has_type_comment) {
4228 type_comment = NEW_TYPE_COMMENT(CHILD(n, 5));
4229 if (!type_comment)
4230 return NULL;
4231 }
4232 else
4233 type_comment = NULL;
4234
4235 if (is_async)
4236 return AsyncFor(target, expression, suite_seq, seq, type_comment,
4237 LINENO(n0), n0->n_col_offset,
4238 end_lineno, end_col_offset, c->c_arena);
4239 else
4240 return For(target, expression, suite_seq, seq, type_comment,
4241 LINENO(n), n->n_col_offset,
4242 end_lineno, end_col_offset, c->c_arena);
4243 }
4244
4245 static excepthandler_ty
ast_for_except_clause(struct compiling * c,const node * exc,node * body)4246 ast_for_except_clause(struct compiling *c, const node *exc, node *body)
4247 {
4248 /* except_clause: 'except' [test ['as' test]] */
4249 int end_lineno, end_col_offset;
4250 REQ(exc, except_clause);
4251 REQ(body, suite);
4252
4253 if (NCH(exc) == 1) {
4254 asdl_seq *suite_seq = ast_for_suite(c, body);
4255 if (!suite_seq)
4256 return NULL;
4257 get_last_end_pos(suite_seq, &end_lineno, &end_col_offset);
4258
4259 return ExceptHandler(NULL, NULL, suite_seq, LINENO(exc),
4260 exc->n_col_offset,
4261 end_lineno, end_col_offset, c->c_arena);
4262 }
4263 else if (NCH(exc) == 2) {
4264 expr_ty expression;
4265 asdl_seq *suite_seq;
4266
4267 expression = ast_for_expr(c, CHILD(exc, 1));
4268 if (!expression)
4269 return NULL;
4270 suite_seq = ast_for_suite(c, body);
4271 if (!suite_seq)
4272 return NULL;
4273 get_last_end_pos(suite_seq, &end_lineno, &end_col_offset);
4274
4275 return ExceptHandler(expression, NULL, suite_seq, LINENO(exc),
4276 exc->n_col_offset,
4277 end_lineno, end_col_offset, c->c_arena);
4278 }
4279 else if (NCH(exc) == 4) {
4280 asdl_seq *suite_seq;
4281 expr_ty expression;
4282 identifier e = NEW_IDENTIFIER(CHILD(exc, 3));
4283 if (!e)
4284 return NULL;
4285 if (forbidden_name(c, e, CHILD(exc, 3), 0))
4286 return NULL;
4287 expression = ast_for_expr(c, CHILD(exc, 1));
4288 if (!expression)
4289 return NULL;
4290 suite_seq = ast_for_suite(c, body);
4291 if (!suite_seq)
4292 return NULL;
4293 get_last_end_pos(suite_seq, &end_lineno, &end_col_offset);
4294
4295 return ExceptHandler(expression, e, suite_seq, LINENO(exc),
4296 exc->n_col_offset,
4297 end_lineno, end_col_offset, c->c_arena);
4298 }
4299
4300 PyErr_Format(PyExc_SystemError,
4301 "wrong number of children for 'except' clause: %d",
4302 NCH(exc));
4303 return NULL;
4304 }
4305
4306 static stmt_ty
ast_for_try_stmt(struct compiling * c,const node * n)4307 ast_for_try_stmt(struct compiling *c, const node *n)
4308 {
4309 const int nch = NCH(n);
4310 int end_lineno, end_col_offset, n_except = (nch - 3)/3;
4311 asdl_seq *body, *handlers = NULL, *orelse = NULL, *finally = NULL;
4312 excepthandler_ty last_handler;
4313
4314 REQ(n, try_stmt);
4315
4316 body = ast_for_suite(c, CHILD(n, 2));
4317 if (body == NULL)
4318 return NULL;
4319
4320 if (TYPE(CHILD(n, nch - 3)) == NAME) {
4321 if (strcmp(STR(CHILD(n, nch - 3)), "finally") == 0) {
4322 if (nch >= 9 && TYPE(CHILD(n, nch - 6)) == NAME) {
4323 /* we can assume it's an "else",
4324 because nch >= 9 for try-else-finally and
4325 it would otherwise have a type of except_clause */
4326 orelse = ast_for_suite(c, CHILD(n, nch - 4));
4327 if (orelse == NULL)
4328 return NULL;
4329 n_except--;
4330 }
4331
4332 finally = ast_for_suite(c, CHILD(n, nch - 1));
4333 if (finally == NULL)
4334 return NULL;
4335 n_except--;
4336 }
4337 else {
4338 /* we can assume it's an "else",
4339 otherwise it would have a type of except_clause */
4340 orelse = ast_for_suite(c, CHILD(n, nch - 1));
4341 if (orelse == NULL)
4342 return NULL;
4343 n_except--;
4344 }
4345 }
4346 else if (TYPE(CHILD(n, nch - 3)) != except_clause) {
4347 ast_error(c, n, "malformed 'try' statement");
4348 return NULL;
4349 }
4350
4351 if (n_except > 0) {
4352 int i;
4353 /* process except statements to create a try ... except */
4354 handlers = _Py_asdl_seq_new(n_except, c->c_arena);
4355 if (handlers == NULL)
4356 return NULL;
4357
4358 for (i = 0; i < n_except; i++) {
4359 excepthandler_ty e = ast_for_except_clause(c, CHILD(n, 3 + i * 3),
4360 CHILD(n, 5 + i * 3));
4361 if (!e)
4362 return NULL;
4363 asdl_seq_SET(handlers, i, e);
4364 }
4365 }
4366
4367 assert(finally != NULL || asdl_seq_LEN(handlers));
4368 if (finally != NULL) {
4369 // finally is always last
4370 get_last_end_pos(finally, &end_lineno, &end_col_offset);
4371 } else if (orelse != NULL) {
4372 // otherwise else is last
4373 get_last_end_pos(orelse, &end_lineno, &end_col_offset);
4374 } else {
4375 // inline the get_last_end_pos logic due to layout mismatch
4376 last_handler = (excepthandler_ty) asdl_seq_GET(handlers, n_except - 1);
4377 end_lineno = last_handler->end_lineno;
4378 end_col_offset = last_handler->end_col_offset;
4379 }
4380 return Try(body, handlers, orelse, finally, LINENO(n), n->n_col_offset,
4381 end_lineno, end_col_offset, c->c_arena);
4382 }
4383
4384 /* with_item: test ['as' expr] */
4385 static withitem_ty
ast_for_with_item(struct compiling * c,const node * n)4386 ast_for_with_item(struct compiling *c, const node *n)
4387 {
4388 expr_ty context_expr, optional_vars = NULL;
4389
4390 REQ(n, with_item);
4391 context_expr = ast_for_expr(c, CHILD(n, 0));
4392 if (!context_expr)
4393 return NULL;
4394 if (NCH(n) == 3) {
4395 optional_vars = ast_for_expr(c, CHILD(n, 2));
4396
4397 if (!optional_vars) {
4398 return NULL;
4399 }
4400 if (!set_context(c, optional_vars, Store, n)) {
4401 return NULL;
4402 }
4403 }
4404
4405 return withitem(context_expr, optional_vars, c->c_arena);
4406 }
4407
4408 /* with_stmt: 'with' with_item (',' with_item)* ':' [TYPE_COMMENT] suite */
4409 static stmt_ty
ast_for_with_stmt(struct compiling * c,const node * n0,bool is_async)4410 ast_for_with_stmt(struct compiling *c, const node *n0, bool is_async)
4411 {
4412 const node * const n = is_async ? CHILD(n0, 1) : n0;
4413 int i, n_items, nch_minus_type, has_type_comment, end_lineno, end_col_offset;
4414 asdl_seq *items, *body;
4415 string type_comment;
4416
4417 if (is_async && c->c_feature_version < 5) {
4418 ast_error(c, n,
4419 "Async with statements are only supported in Python 3.5 and greater");
4420 return NULL;
4421 }
4422
4423 REQ(n, with_stmt);
4424
4425 has_type_comment = TYPE(CHILD(n, NCH(n) - 2)) == TYPE_COMMENT;
4426 nch_minus_type = NCH(n) - has_type_comment;
4427
4428 n_items = (nch_minus_type - 2) / 2;
4429 items = _Py_asdl_seq_new(n_items, c->c_arena);
4430 if (!items)
4431 return NULL;
4432 for (i = 1; i < nch_minus_type - 2; i += 2) {
4433 withitem_ty item = ast_for_with_item(c, CHILD(n, i));
4434 if (!item)
4435 return NULL;
4436 asdl_seq_SET(items, (i - 1) / 2, item);
4437 }
4438
4439 body = ast_for_suite(c, CHILD(n, NCH(n) - 1));
4440 if (!body)
4441 return NULL;
4442 get_last_end_pos(body, &end_lineno, &end_col_offset);
4443
4444 if (has_type_comment) {
4445 type_comment = NEW_TYPE_COMMENT(CHILD(n, NCH(n) - 2));
4446 if (!type_comment)
4447 return NULL;
4448 }
4449 else
4450 type_comment = NULL;
4451
4452 if (is_async)
4453 return AsyncWith(items, body, type_comment, LINENO(n0), n0->n_col_offset,
4454 end_lineno, end_col_offset, c->c_arena);
4455 else
4456 return With(items, body, type_comment, LINENO(n), n->n_col_offset,
4457 end_lineno, end_col_offset, c->c_arena);
4458 }
4459
4460 static stmt_ty
ast_for_classdef(struct compiling * c,const node * n,asdl_seq * decorator_seq)4461 ast_for_classdef(struct compiling *c, const node *n, asdl_seq *decorator_seq)
4462 {
4463 /* classdef: 'class' NAME ['(' arglist ')'] ':' suite */
4464 PyObject *classname;
4465 asdl_seq *s;
4466 expr_ty call;
4467 int end_lineno, end_col_offset;
4468
4469 REQ(n, classdef);
4470
4471 if (NCH(n) == 4) { /* class NAME ':' suite */
4472 s = ast_for_suite(c, CHILD(n, 3));
4473 if (!s)
4474 return NULL;
4475 get_last_end_pos(s, &end_lineno, &end_col_offset);
4476
4477 classname = NEW_IDENTIFIER(CHILD(n, 1));
4478 if (!classname)
4479 return NULL;
4480 if (forbidden_name(c, classname, CHILD(n, 3), 0))
4481 return NULL;
4482 return ClassDef(classname, NULL, NULL, s, decorator_seq,
4483 LINENO(n), n->n_col_offset,
4484 end_lineno, end_col_offset, c->c_arena);
4485 }
4486
4487 if (TYPE(CHILD(n, 3)) == RPAR) { /* class NAME '(' ')' ':' suite */
4488 s = ast_for_suite(c, CHILD(n, 5));
4489 if (!s)
4490 return NULL;
4491 get_last_end_pos(s, &end_lineno, &end_col_offset);
4492
4493 classname = NEW_IDENTIFIER(CHILD(n, 1));
4494 if (!classname)
4495 return NULL;
4496 if (forbidden_name(c, classname, CHILD(n, 3), 0))
4497 return NULL;
4498 return ClassDef(classname, NULL, NULL, s, decorator_seq,
4499 LINENO(n), n->n_col_offset,
4500 end_lineno, end_col_offset, c->c_arena);
4501 }
4502
4503 /* class NAME '(' arglist ')' ':' suite */
4504 /* build up a fake Call node so we can extract its pieces */
4505 {
4506 PyObject *dummy_name;
4507 expr_ty dummy;
4508 dummy_name = NEW_IDENTIFIER(CHILD(n, 1));
4509 if (!dummy_name)
4510 return NULL;
4511 dummy = Name(dummy_name, Load, LINENO(n), n->n_col_offset,
4512 CHILD(n, 1)->n_end_lineno, CHILD(n, 1)->n_end_col_offset,
4513 c->c_arena);
4514 call = ast_for_call(c, CHILD(n, 3), dummy,
4515 CHILD(n, 1), NULL, CHILD(n, 4));
4516 if (!call)
4517 return NULL;
4518 }
4519 s = ast_for_suite(c, CHILD(n, 6));
4520 if (!s)
4521 return NULL;
4522 get_last_end_pos(s, &end_lineno, &end_col_offset);
4523
4524 classname = NEW_IDENTIFIER(CHILD(n, 1));
4525 if (!classname)
4526 return NULL;
4527 if (forbidden_name(c, classname, CHILD(n, 1), 0))
4528 return NULL;
4529
4530 return ClassDef(classname, call->v.Call.args, call->v.Call.keywords, s,
4531 decorator_seq, LINENO(n), n->n_col_offset,
4532 end_lineno, end_col_offset, c->c_arena);
4533 }
4534
4535 static stmt_ty
ast_for_stmt(struct compiling * c,const node * n)4536 ast_for_stmt(struct compiling *c, const node *n)
4537 {
4538 if (TYPE(n) == stmt) {
4539 assert(NCH(n) == 1);
4540 n = CHILD(n, 0);
4541 }
4542 if (TYPE(n) == simple_stmt) {
4543 assert(num_stmts(n) == 1);
4544 n = CHILD(n, 0);
4545 }
4546 if (TYPE(n) == small_stmt) {
4547 n = CHILD(n, 0);
4548 /* small_stmt: expr_stmt | del_stmt | pass_stmt | flow_stmt
4549 | import_stmt | global_stmt | nonlocal_stmt | assert_stmt
4550 */
4551 switch (TYPE(n)) {
4552 case expr_stmt:
4553 return ast_for_expr_stmt(c, n);
4554 case del_stmt:
4555 return ast_for_del_stmt(c, n);
4556 case pass_stmt:
4557 return Pass(LINENO(n), n->n_col_offset,
4558 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
4559 case flow_stmt:
4560 return ast_for_flow_stmt(c, n);
4561 case import_stmt:
4562 return ast_for_import_stmt(c, n);
4563 case global_stmt:
4564 return ast_for_global_stmt(c, n);
4565 case nonlocal_stmt:
4566 return ast_for_nonlocal_stmt(c, n);
4567 case assert_stmt:
4568 return ast_for_assert_stmt(c, n);
4569 default:
4570 PyErr_Format(PyExc_SystemError,
4571 "unhandled small_stmt: TYPE=%d NCH=%d\n",
4572 TYPE(n), NCH(n));
4573 return NULL;
4574 }
4575 }
4576 else {
4577 /* compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt
4578 | funcdef | classdef | decorated | async_stmt
4579 */
4580 node *ch = CHILD(n, 0);
4581 REQ(n, compound_stmt);
4582 switch (TYPE(ch)) {
4583 case if_stmt:
4584 return ast_for_if_stmt(c, ch);
4585 case while_stmt:
4586 return ast_for_while_stmt(c, ch);
4587 case for_stmt:
4588 return ast_for_for_stmt(c, ch, 0);
4589 case try_stmt:
4590 return ast_for_try_stmt(c, ch);
4591 case with_stmt:
4592 return ast_for_with_stmt(c, ch, 0);
4593 case funcdef:
4594 return ast_for_funcdef(c, ch, NULL);
4595 case classdef:
4596 return ast_for_classdef(c, ch, NULL);
4597 case decorated:
4598 return ast_for_decorated(c, ch);
4599 case async_stmt:
4600 return ast_for_async_stmt(c, ch);
4601 default:
4602 PyErr_Format(PyExc_SystemError,
4603 "unhandled compound_stmt: TYPE=%d NCH=%d\n",
4604 TYPE(n), NCH(n));
4605 return NULL;
4606 }
4607 }
4608 }
4609
4610 static PyObject *
parsenumber_raw(struct compiling * c,const char * s)4611 parsenumber_raw(struct compiling *c, const char *s)
4612 {
4613 const char *end;
4614 long x;
4615 double dx;
4616 Py_complex compl;
4617 int imflag;
4618
4619 assert(s != NULL);
4620 errno = 0;
4621 end = s + strlen(s) - 1;
4622 imflag = *end == 'j' || *end == 'J';
4623 if (s[0] == '0') {
4624 x = (long) PyOS_strtoul(s, (char **)&end, 0);
4625 if (x < 0 && errno == 0) {
4626 return PyLong_FromString(s, (char **)0, 0);
4627 }
4628 }
4629 else
4630 x = PyOS_strtol(s, (char **)&end, 0);
4631 if (*end == '\0') {
4632 if (errno != 0)
4633 return PyLong_FromString(s, (char **)0, 0);
4634 return PyLong_FromLong(x);
4635 }
4636 /* XXX Huge floats may silently fail */
4637 if (imflag) {
4638 compl.real = 0.;
4639 compl.imag = PyOS_string_to_double(s, (char **)&end, NULL);
4640 if (compl.imag == -1.0 && PyErr_Occurred())
4641 return NULL;
4642 return PyComplex_FromCComplex(compl);
4643 }
4644 else
4645 {
4646 dx = PyOS_string_to_double(s, NULL, NULL);
4647 if (dx == -1.0 && PyErr_Occurred())
4648 return NULL;
4649 return PyFloat_FromDouble(dx);
4650 }
4651 }
4652
4653 static PyObject *
parsenumber(struct compiling * c,const char * s)4654 parsenumber(struct compiling *c, const char *s)
4655 {
4656 char *dup, *end;
4657 PyObject *res = NULL;
4658
4659 assert(s != NULL);
4660
4661 if (strchr(s, '_') == NULL) {
4662 return parsenumber_raw(c, s);
4663 }
4664 /* Create a duplicate without underscores. */
4665 dup = PyMem_Malloc(strlen(s) + 1);
4666 if (dup == NULL) {
4667 return PyErr_NoMemory();
4668 }
4669 end = dup;
4670 for (; *s; s++) {
4671 if (*s != '_') {
4672 *end++ = *s;
4673 }
4674 }
4675 *end = '\0';
4676 res = parsenumber_raw(c, dup);
4677 PyMem_Free(dup);
4678 return res;
4679 }
4680
4681 static PyObject *
decode_utf8(struct compiling * c,const char ** sPtr,const char * end)4682 decode_utf8(struct compiling *c, const char **sPtr, const char *end)
4683 {
4684 const char *s, *t;
4685 t = s = *sPtr;
4686 /* while (s < end && *s != '\\') s++; */ /* inefficient for u".." */
4687 while (s < end && (*s & 0x80)) s++;
4688 *sPtr = s;
4689 return PyUnicode_DecodeUTF8(t, s - t, NULL);
4690 }
4691
4692 static int
warn_invalid_escape_sequence(struct compiling * c,const node * n,unsigned char first_invalid_escape_char)4693 warn_invalid_escape_sequence(struct compiling *c, const node *n,
4694 unsigned char first_invalid_escape_char)
4695 {
4696 PyObject *msg = PyUnicode_FromFormat("invalid escape sequence \\%c",
4697 first_invalid_escape_char);
4698 if (msg == NULL) {
4699 return -1;
4700 }
4701 if (PyErr_WarnExplicitObject(PyExc_DeprecationWarning, msg,
4702 c->c_filename, LINENO(n),
4703 NULL, NULL) < 0)
4704 {
4705 if (PyErr_ExceptionMatches(PyExc_DeprecationWarning)) {
4706 /* Replace the DeprecationWarning exception with a SyntaxError
4707 to get a more accurate error report */
4708 PyErr_Clear();
4709 ast_error(c, n, "%U", msg);
4710 }
4711 Py_DECREF(msg);
4712 return -1;
4713 }
4714 Py_DECREF(msg);
4715 return 0;
4716 }
4717
4718 static PyObject *
decode_unicode_with_escapes(struct compiling * c,const node * n,const char * s,size_t len)4719 decode_unicode_with_escapes(struct compiling *c, const node *n, const char *s,
4720 size_t len)
4721 {
4722 PyObject *v, *u;
4723 char *buf;
4724 char *p;
4725 const char *end;
4726
4727 /* check for integer overflow */
4728 if (len > SIZE_MAX / 6)
4729 return NULL;
4730 /* "ä" (2 bytes) may become "\U000000E4" (10 bytes), or 1:5
4731 "\ä" (3 bytes) may become "\u005c\U000000E4" (16 bytes), or ~1:6 */
4732 u = PyBytes_FromStringAndSize((char *)NULL, len * 6);
4733 if (u == NULL)
4734 return NULL;
4735 p = buf = PyBytes_AsString(u);
4736 end = s + len;
4737 while (s < end) {
4738 if (*s == '\\') {
4739 *p++ = *s++;
4740 if (s >= end || *s & 0x80) {
4741 strcpy(p, "u005c");
4742 p += 5;
4743 if (s >= end)
4744 break;
4745 }
4746 }
4747 if (*s & 0x80) { /* XXX inefficient */
4748 PyObject *w;
4749 int kind;
4750 void *data;
4751 Py_ssize_t len, i;
4752 w = decode_utf8(c, &s, end);
4753 if (w == NULL) {
4754 Py_DECREF(u);
4755 return NULL;
4756 }
4757 kind = PyUnicode_KIND(w);
4758 data = PyUnicode_DATA(w);
4759 len = PyUnicode_GET_LENGTH(w);
4760 for (i = 0; i < len; i++) {
4761 Py_UCS4 chr = PyUnicode_READ(kind, data, i);
4762 sprintf(p, "\\U%08x", chr);
4763 p += 10;
4764 }
4765 /* Should be impossible to overflow */
4766 assert(p - buf <= PyBytes_GET_SIZE(u));
4767 Py_DECREF(w);
4768 } else {
4769 *p++ = *s++;
4770 }
4771 }
4772 len = p - buf;
4773 s = buf;
4774
4775 const char *first_invalid_escape;
4776 v = _PyUnicode_DecodeUnicodeEscape(s, len, NULL, &first_invalid_escape);
4777
4778 if (v != NULL && first_invalid_escape != NULL) {
4779 if (warn_invalid_escape_sequence(c, n, *first_invalid_escape) < 0) {
4780 /* We have not decref u before because first_invalid_escape points
4781 inside u. */
4782 Py_XDECREF(u);
4783 Py_DECREF(v);
4784 return NULL;
4785 }
4786 }
4787 Py_XDECREF(u);
4788 return v;
4789 }
4790
4791 static PyObject *
decode_bytes_with_escapes(struct compiling * c,const node * n,const char * s,size_t len)4792 decode_bytes_with_escapes(struct compiling *c, const node *n, const char *s,
4793 size_t len)
4794 {
4795 const char *first_invalid_escape;
4796 PyObject *result = _PyBytes_DecodeEscape(s, len, NULL, 0, NULL,
4797 &first_invalid_escape);
4798 if (result == NULL)
4799 return NULL;
4800
4801 if (first_invalid_escape != NULL) {
4802 if (warn_invalid_escape_sequence(c, n, *first_invalid_escape) < 0) {
4803 Py_DECREF(result);
4804 return NULL;
4805 }
4806 }
4807 return result;
4808 }
4809
4810 /* Shift locations for the given node and all its children by adding `lineno`
4811 and `col_offset` to existing locations. */
fstring_shift_node_locations(node * n,int lineno,int col_offset)4812 static void fstring_shift_node_locations(node *n, int lineno, int col_offset)
4813 {
4814 n->n_col_offset = n->n_col_offset + col_offset;
4815 n->n_end_col_offset = n->n_end_col_offset + col_offset;
4816 for (int i = 0; i < NCH(n); ++i) {
4817 if (n->n_lineno && n->n_lineno < CHILD(n, i)->n_lineno) {
4818 /* Shifting column offsets unnecessary if there's been newlines. */
4819 col_offset = 0;
4820 }
4821 fstring_shift_node_locations(CHILD(n, i), lineno, col_offset);
4822 }
4823 n->n_lineno = n->n_lineno + lineno;
4824 n->n_end_lineno = n->n_end_lineno + lineno;
4825 }
4826
4827 /* Fix locations for the given node and its children.
4828
4829 `parent` is the enclosing node.
4830 `n` is the node which locations are going to be fixed relative to parent.
4831 `expr_str` is the child node's string representation, including braces.
4832 */
4833 static void
fstring_fix_node_location(const node * parent,node * n,char * expr_str)4834 fstring_fix_node_location(const node *parent, node *n, char *expr_str)
4835 {
4836 char *substr = NULL;
4837 char *start;
4838 int lines = LINENO(parent) - 1;
4839 int cols = parent->n_col_offset;
4840 /* Find the full fstring to fix location information in `n`. */
4841 while (parent && parent->n_type != STRING)
4842 parent = parent->n_child;
4843 if (parent && parent->n_str) {
4844 substr = strstr(parent->n_str, expr_str);
4845 if (substr) {
4846 start = substr;
4847 while (start > parent->n_str) {
4848 if (start[0] == '\n')
4849 break;
4850 start--;
4851 }
4852 cols += (int)(substr - start);
4853 /* adjust the start based on the number of newlines encountered
4854 before the f-string expression */
4855 for (char* p = parent->n_str; p < substr; p++) {
4856 if (*p == '\n') {
4857 lines++;
4858 }
4859 }
4860 }
4861 }
4862 fstring_shift_node_locations(n, lines, cols);
4863 }
4864
4865 /* Compile this expression in to an expr_ty. Add parens around the
4866 expression, in order to allow leading spaces in the expression. */
4867 static expr_ty
fstring_compile_expr(const char * expr_start,const char * expr_end,struct compiling * c,const node * n)4868 fstring_compile_expr(const char *expr_start, const char *expr_end,
4869 struct compiling *c, const node *n)
4870
4871 {
4872 node *mod_n;
4873 mod_ty mod;
4874 char *str;
4875 Py_ssize_t len;
4876 const char *s;
4877
4878 assert(expr_end >= expr_start);
4879 assert(*(expr_start-1) == '{');
4880 assert(*expr_end == '}' || *expr_end == '!' || *expr_end == ':' ||
4881 *expr_end == '=');
4882
4883 /* If the substring is all whitespace, it's an error. We need to catch this
4884 here, and not when we call PyParser_SimpleParseStringFlagsFilename,
4885 because turning the expression '' in to '()' would go from being invalid
4886 to valid. */
4887 for (s = expr_start; s != expr_end; s++) {
4888 char c = *s;
4889 /* The Python parser ignores only the following whitespace
4890 characters (\r already is converted to \n). */
4891 if (!(c == ' ' || c == '\t' || c == '\n' || c == '\f')) {
4892 break;
4893 }
4894 }
4895 if (s == expr_end) {
4896 ast_error(c, n, "f-string: empty expression not allowed");
4897 return NULL;
4898 }
4899
4900 len = expr_end - expr_start;
4901 /* Allocate 3 extra bytes: open paren, close paren, null byte. */
4902 str = PyMem_Malloc(len + 3);
4903 if (str == NULL) {
4904 PyErr_NoMemory();
4905 return NULL;
4906 }
4907
4908 str[0] = '(';
4909 memcpy(str+1, expr_start, len);
4910 str[len+1] = ')';
4911 str[len+2] = 0;
4912
4913 PyCompilerFlags cf = _PyCompilerFlags_INIT;
4914 cf.cf_flags = PyCF_ONLY_AST;
4915 mod_n = PyParser_SimpleParseStringFlagsFilename(str, "<fstring>",
4916 Py_eval_input, 0);
4917 if (!mod_n) {
4918 PyMem_Free(str);
4919 return NULL;
4920 }
4921 /* Reuse str to find the correct column offset. */
4922 str[0] = '{';
4923 str[len+1] = '}';
4924 fstring_fix_node_location(n, mod_n, str);
4925 mod = PyAST_FromNode(mod_n, &cf, "<fstring>", c->c_arena);
4926 PyMem_Free(str);
4927 PyNode_Free(mod_n);
4928 if (!mod)
4929 return NULL;
4930 return mod->v.Expression.body;
4931 }
4932
4933 /* Return -1 on error.
4934
4935 Return 0 if we reached the end of the literal.
4936
4937 Return 1 if we haven't reached the end of the literal, but we want
4938 the caller to process the literal up to this point. Used for
4939 doubled braces.
4940 */
4941 static int
fstring_find_literal(const char ** str,const char * end,int raw,PyObject ** literal,int recurse_lvl,struct compiling * c,const node * n)4942 fstring_find_literal(const char **str, const char *end, int raw,
4943 PyObject **literal, int recurse_lvl,
4944 struct compiling *c, const node *n)
4945 {
4946 /* Get any literal string. It ends when we hit an un-doubled left
4947 brace (which isn't part of a unicode name escape such as
4948 "\N{EULER CONSTANT}"), or the end of the string. */
4949
4950 const char *s = *str;
4951 const char *literal_start = s;
4952 int result = 0;
4953
4954 assert(*literal == NULL);
4955 while (s < end) {
4956 char ch = *s++;
4957 if (!raw && ch == '\\' && s < end) {
4958 ch = *s++;
4959 if (ch == 'N') {
4960 if (s < end && *s++ == '{') {
4961 while (s < end && *s++ != '}') {
4962 }
4963 continue;
4964 }
4965 break;
4966 }
4967 if (ch == '{' && warn_invalid_escape_sequence(c, n, ch) < 0) {
4968 return -1;
4969 }
4970 }
4971 if (ch == '{' || ch == '}') {
4972 /* Check for doubled braces, but only at the top level. If
4973 we checked at every level, then f'{0:{3}}' would fail
4974 with the two closing braces. */
4975 if (recurse_lvl == 0) {
4976 if (s < end && *s == ch) {
4977 /* We're going to tell the caller that the literal ends
4978 here, but that they should continue scanning. But also
4979 skip over the second brace when we resume scanning. */
4980 *str = s + 1;
4981 result = 1;
4982 goto done;
4983 }
4984
4985 /* Where a single '{' is the start of a new expression, a
4986 single '}' is not allowed. */
4987 if (ch == '}') {
4988 *str = s - 1;
4989 ast_error(c, n, "f-string: single '}' is not allowed");
4990 return -1;
4991 }
4992 }
4993 /* We're either at a '{', which means we're starting another
4994 expression; or a '}', which means we're at the end of this
4995 f-string (for a nested format_spec). */
4996 s--;
4997 break;
4998 }
4999 }
5000 *str = s;
5001 assert(s <= end);
5002 assert(s == end || *s == '{' || *s == '}');
5003 done:
5004 if (literal_start != s) {
5005 if (raw)
5006 *literal = PyUnicode_DecodeUTF8Stateful(literal_start,
5007 s - literal_start,
5008 NULL, NULL);
5009 else
5010 *literal = decode_unicode_with_escapes(c, n, literal_start,
5011 s - literal_start);
5012 if (!*literal)
5013 return -1;
5014 }
5015 return result;
5016 }
5017
5018 /* Forward declaration because parsing is recursive. */
5019 static expr_ty
5020 fstring_parse(const char **str, const char *end, int raw, int recurse_lvl,
5021 struct compiling *c, const node *n);
5022
5023 /* Parse the f-string at *str, ending at end. We know *str starts an
5024 expression (so it must be a '{'). Returns the FormattedValue node, which
5025 includes the expression, conversion character, format_spec expression, and
5026 optionally the text of the expression (if = is used).
5027
5028 Note that I don't do a perfect job here: I don't make sure that a
5029 closing brace doesn't match an opening paren, for example. It
5030 doesn't need to error on all invalid expressions, just correctly
5031 find the end of all valid ones. Any errors inside the expression
5032 will be caught when we parse it later.
5033
5034 *expression is set to the expression. For an '=' "debug" expression,
5035 *expr_text is set to the debug text (the original text of the expression,
5036 including the '=' and any whitespace around it, as a string object). If
5037 not a debug expression, *expr_text set to NULL. */
5038 static int
fstring_find_expr(const char ** str,const char * end,int raw,int recurse_lvl,PyObject ** expr_text,expr_ty * expression,struct compiling * c,const node * n)5039 fstring_find_expr(const char **str, const char *end, int raw, int recurse_lvl,
5040 PyObject **expr_text, expr_ty *expression,
5041 struct compiling *c, const node *n)
5042 {
5043 /* Return -1 on error, else 0. */
5044
5045 const char *expr_start;
5046 const char *expr_end;
5047 expr_ty simple_expression;
5048 expr_ty format_spec = NULL; /* Optional format specifier. */
5049 int conversion = -1; /* The conversion char. Use default if not
5050 specified, or !r if using = and no format
5051 spec. */
5052
5053 /* 0 if we're not in a string, else the quote char we're trying to
5054 match (single or double quote). */
5055 char quote_char = 0;
5056
5057 /* If we're inside a string, 1=normal, 3=triple-quoted. */
5058 int string_type = 0;
5059
5060 /* Keep track of nesting level for braces/parens/brackets in
5061 expressions. */
5062 Py_ssize_t nested_depth = 0;
5063 char parenstack[MAXLEVEL];
5064
5065 *expr_text = NULL;
5066
5067 /* Can only nest one level deep. */
5068 if (recurse_lvl >= 2) {
5069 ast_error(c, n, "f-string: expressions nested too deeply");
5070 goto error;
5071 }
5072
5073 /* The first char must be a left brace, or we wouldn't have gotten
5074 here. Skip over it. */
5075 assert(**str == '{');
5076 *str += 1;
5077
5078 expr_start = *str;
5079 for (; *str < end; (*str)++) {
5080 char ch;
5081
5082 /* Loop invariants. */
5083 assert(nested_depth >= 0);
5084 assert(*str >= expr_start && *str < end);
5085 if (quote_char)
5086 assert(string_type == 1 || string_type == 3);
5087 else
5088 assert(string_type == 0);
5089
5090 ch = **str;
5091 /* Nowhere inside an expression is a backslash allowed. */
5092 if (ch == '\\') {
5093 /* Error: can't include a backslash character, inside
5094 parens or strings or not. */
5095 ast_error(c, n,
5096 "f-string expression part "
5097 "cannot include a backslash");
5098 goto error;
5099 }
5100 if (quote_char) {
5101 /* We're inside a string. See if we're at the end. */
5102 /* This code needs to implement the same non-error logic
5103 as tok_get from tokenizer.c, at the letter_quote
5104 label. To actually share that code would be a
5105 nightmare. But, it's unlikely to change and is small,
5106 so duplicate it here. Note we don't need to catch all
5107 of the errors, since they'll be caught when parsing the
5108 expression. We just need to match the non-error
5109 cases. Thus we can ignore \n in single-quoted strings,
5110 for example. Or non-terminated strings. */
5111 if (ch == quote_char) {
5112 /* Does this match the string_type (single or triple
5113 quoted)? */
5114 if (string_type == 3) {
5115 if (*str+2 < end && *(*str+1) == ch && *(*str+2) == ch) {
5116 /* We're at the end of a triple quoted string. */
5117 *str += 2;
5118 string_type = 0;
5119 quote_char = 0;
5120 continue;
5121 }
5122 } else {
5123 /* We're at the end of a normal string. */
5124 quote_char = 0;
5125 string_type = 0;
5126 continue;
5127 }
5128 }
5129 } else if (ch == '\'' || ch == '"') {
5130 /* Is this a triple quoted string? */
5131 if (*str+2 < end && *(*str+1) == ch && *(*str+2) == ch) {
5132 string_type = 3;
5133 *str += 2;
5134 } else {
5135 /* Start of a normal string. */
5136 string_type = 1;
5137 }
5138 /* Start looking for the end of the string. */
5139 quote_char = ch;
5140 } else if (ch == '[' || ch == '{' || ch == '(') {
5141 if (nested_depth >= MAXLEVEL) {
5142 ast_error(c, n, "f-string: too many nested parenthesis");
5143 goto error;
5144 }
5145 parenstack[nested_depth] = ch;
5146 nested_depth++;
5147 } else if (ch == '#') {
5148 /* Error: can't include a comment character, inside parens
5149 or not. */
5150 ast_error(c, n, "f-string expression part cannot include '#'");
5151 goto error;
5152 } else if (nested_depth == 0 &&
5153 (ch == '!' || ch == ':' || ch == '}' ||
5154 ch == '=' || ch == '>' || ch == '<')) {
5155 /* See if there's a next character. */
5156 if (*str+1 < end) {
5157 char next = *(*str+1);
5158
5159 /* For "!=". since '=' is not an allowed conversion character,
5160 nothing is lost in this test. */
5161 if ((ch == '!' && next == '=') || /* != */
5162 (ch == '=' && next == '=') || /* == */
5163 (ch == '<' && next == '=') || /* <= */
5164 (ch == '>' && next == '=') /* >= */
5165 ) {
5166 *str += 1;
5167 continue;
5168 }
5169 /* Don't get out of the loop for these, if they're single
5170 chars (not part of 2-char tokens). If by themselves, they
5171 don't end an expression (unlike say '!'). */
5172 if (ch == '>' || ch == '<') {
5173 continue;
5174 }
5175 }
5176
5177 /* Normal way out of this loop. */
5178 break;
5179 } else if (ch == ']' || ch == '}' || ch == ')') {
5180 if (!nested_depth) {
5181 ast_error(c, n, "f-string: unmatched '%c'", ch);
5182 goto error;
5183 }
5184 nested_depth--;
5185 int opening = parenstack[nested_depth];
5186 if (!((opening == '(' && ch == ')') ||
5187 (opening == '[' && ch == ']') ||
5188 (opening == '{' && ch == '}')))
5189 {
5190 ast_error(c, n,
5191 "f-string: closing parenthesis '%c' "
5192 "does not match opening parenthesis '%c'",
5193 ch, opening);
5194 goto error;
5195 }
5196 } else {
5197 /* Just consume this char and loop around. */
5198 }
5199 }
5200 expr_end = *str;
5201 /* If we leave this loop in a string or with mismatched parens, we
5202 don't care. We'll get a syntax error when compiling the
5203 expression. But, we can produce a better error message, so
5204 let's just do that.*/
5205 if (quote_char) {
5206 ast_error(c, n, "f-string: unterminated string");
5207 goto error;
5208 }
5209 if (nested_depth) {
5210 int opening = parenstack[nested_depth - 1];
5211 ast_error(c, n, "f-string: unmatched '%c'", opening);
5212 goto error;
5213 }
5214
5215 if (*str >= end)
5216 goto unexpected_end_of_string;
5217
5218 /* Compile the expression as soon as possible, so we show errors
5219 related to the expression before errors related to the
5220 conversion or format_spec. */
5221 simple_expression = fstring_compile_expr(expr_start, expr_end, c, n);
5222 if (!simple_expression)
5223 goto error;
5224
5225 /* Check for =, which puts the text value of the expression in
5226 expr_text. */
5227 if (**str == '=') {
5228 if (c->c_feature_version < 8) {
5229 ast_error(c, n,
5230 "f-string: self documenting expressions are "
5231 "only supported in Python 3.8 and greater");
5232 goto error;
5233 }
5234 *str += 1;
5235
5236 /* Skip over ASCII whitespace. No need to test for end of string
5237 here, since we know there's at least a trailing quote somewhere
5238 ahead. */
5239 while (Py_ISSPACE(**str)) {
5240 *str += 1;
5241 }
5242
5243 /* Set *expr_text to the text of the expression. */
5244 *expr_text = PyUnicode_FromStringAndSize(expr_start, *str-expr_start);
5245 if (!*expr_text) {
5246 goto error;
5247 }
5248 }
5249
5250 /* Check for a conversion char, if present. */
5251 if (**str == '!') {
5252 *str += 1;
5253 if (*str >= end)
5254 goto unexpected_end_of_string;
5255
5256 conversion = **str;
5257 *str += 1;
5258
5259 /* Validate the conversion. */
5260 if (!(conversion == 's' || conversion == 'r' || conversion == 'a')) {
5261 ast_error(c, n,
5262 "f-string: invalid conversion character: "
5263 "expected 's', 'r', or 'a'");
5264 goto error;
5265 }
5266
5267 }
5268
5269 /* Check for the format spec, if present. */
5270 if (*str >= end)
5271 goto unexpected_end_of_string;
5272 if (**str == ':') {
5273 *str += 1;
5274 if (*str >= end)
5275 goto unexpected_end_of_string;
5276
5277 /* Parse the format spec. */
5278 format_spec = fstring_parse(str, end, raw, recurse_lvl+1, c, n);
5279 if (!format_spec)
5280 goto error;
5281 }
5282
5283 if (*str >= end || **str != '}')
5284 goto unexpected_end_of_string;
5285
5286 /* We're at a right brace. Consume it. */
5287 assert(*str < end);
5288 assert(**str == '}');
5289 *str += 1;
5290
5291 /* If we're in = mode (detected by non-NULL expr_text), and have no format
5292 spec and no explict conversion, set the conversion to 'r'. */
5293 if (*expr_text && format_spec == NULL && conversion == -1) {
5294 conversion = 'r';
5295 }
5296
5297 /* And now create the FormattedValue node that represents this
5298 entire expression with the conversion and format spec. */
5299 *expression = FormattedValue(simple_expression, conversion,
5300 format_spec, LINENO(n),
5301 n->n_col_offset, n->n_end_lineno,
5302 n->n_end_col_offset, c->c_arena);
5303 if (!*expression)
5304 goto error;
5305
5306 return 0;
5307
5308 unexpected_end_of_string:
5309 ast_error(c, n, "f-string: expecting '}'");
5310 /* Falls through to error. */
5311
5312 error:
5313 Py_XDECREF(*expr_text);
5314 return -1;
5315
5316 }
5317
5318 /* Return -1 on error.
5319
5320 Return 0 if we have a literal (possible zero length) and an
5321 expression (zero length if at the end of the string.
5322
5323 Return 1 if we have a literal, but no expression, and we want the
5324 caller to call us again. This is used to deal with doubled
5325 braces.
5326
5327 When called multiple times on the string 'a{{b{0}c', this function
5328 will return:
5329
5330 1. the literal 'a{' with no expression, and a return value
5331 of 1. Despite the fact that there's no expression, the return
5332 value of 1 means we're not finished yet.
5333
5334 2. the literal 'b' and the expression '0', with a return value of
5335 0. The fact that there's an expression means we're not finished.
5336
5337 3. literal 'c' with no expression and a return value of 0. The
5338 combination of the return value of 0 with no expression means
5339 we're finished.
5340 */
5341 static int
fstring_find_literal_and_expr(const char ** str,const char * end,int raw,int recurse_lvl,PyObject ** literal,PyObject ** expr_text,expr_ty * expression,struct compiling * c,const node * n)5342 fstring_find_literal_and_expr(const char **str, const char *end, int raw,
5343 int recurse_lvl, PyObject **literal,
5344 PyObject **expr_text, expr_ty *expression,
5345 struct compiling *c, const node *n)
5346 {
5347 int result;
5348
5349 assert(*literal == NULL && *expression == NULL);
5350
5351 /* Get any literal string. */
5352 result = fstring_find_literal(str, end, raw, literal, recurse_lvl, c, n);
5353 if (result < 0)
5354 goto error;
5355
5356 assert(result == 0 || result == 1);
5357
5358 if (result == 1)
5359 /* We have a literal, but don't look at the expression. */
5360 return 1;
5361
5362 if (*str >= end || **str == '}')
5363 /* We're at the end of the string or the end of a nested
5364 f-string: no expression. The top-level error case where we
5365 expect to be at the end of the string but we're at a '}' is
5366 handled later. */
5367 return 0;
5368
5369 /* We must now be the start of an expression, on a '{'. */
5370 assert(**str == '{');
5371
5372 if (fstring_find_expr(str, end, raw, recurse_lvl, expr_text,
5373 expression, c, n) < 0)
5374 goto error;
5375
5376 return 0;
5377
5378 error:
5379 Py_CLEAR(*literal);
5380 return -1;
5381 }
5382
5383 #define EXPRLIST_N_CACHED 64
5384
5385 typedef struct {
5386 /* Incrementally build an array of expr_ty, so be used in an
5387 asdl_seq. Cache some small but reasonably sized number of
5388 expr_ty's, and then after that start dynamically allocating,
5389 doubling the number allocated each time. Note that the f-string
5390 f'{0}a{1}' contains 3 expr_ty's: 2 FormattedValue's, and one
5391 Constant for the literal 'a'. So you add expr_ty's about twice as
5392 fast as you add expressions in an f-string. */
5393
5394 Py_ssize_t allocated; /* Number we've allocated. */
5395 Py_ssize_t size; /* Number we've used. */
5396 expr_ty *p; /* Pointer to the memory we're actually
5397 using. Will point to 'data' until we
5398 start dynamically allocating. */
5399 expr_ty data[EXPRLIST_N_CACHED];
5400 } ExprList;
5401
5402 #ifdef NDEBUG
5403 #define ExprList_check_invariants(l)
5404 #else
5405 static void
ExprList_check_invariants(ExprList * l)5406 ExprList_check_invariants(ExprList *l)
5407 {
5408 /* Check our invariants. Make sure this object is "live", and
5409 hasn't been deallocated. */
5410 assert(l->size >= 0);
5411 assert(l->p != NULL);
5412 if (l->size <= EXPRLIST_N_CACHED)
5413 assert(l->data == l->p);
5414 }
5415 #endif
5416
5417 static void
ExprList_Init(ExprList * l)5418 ExprList_Init(ExprList *l)
5419 {
5420 l->allocated = EXPRLIST_N_CACHED;
5421 l->size = 0;
5422
5423 /* Until we start allocating dynamically, p points to data. */
5424 l->p = l->data;
5425
5426 ExprList_check_invariants(l);
5427 }
5428
5429 static int
ExprList_Append(ExprList * l,expr_ty exp)5430 ExprList_Append(ExprList *l, expr_ty exp)
5431 {
5432 ExprList_check_invariants(l);
5433 if (l->size >= l->allocated) {
5434 /* We need to alloc (or realloc) the memory. */
5435 Py_ssize_t new_size = l->allocated * 2;
5436
5437 /* See if we've ever allocated anything dynamically. */
5438 if (l->p == l->data) {
5439 Py_ssize_t i;
5440 /* We're still using the cached data. Switch to
5441 alloc-ing. */
5442 l->p = PyMem_Malloc(sizeof(expr_ty) * new_size);
5443 if (!l->p)
5444 return -1;
5445 /* Copy the cached data into the new buffer. */
5446 for (i = 0; i < l->size; i++)
5447 l->p[i] = l->data[i];
5448 } else {
5449 /* Just realloc. */
5450 expr_ty *tmp = PyMem_Realloc(l->p, sizeof(expr_ty) * new_size);
5451 if (!tmp) {
5452 PyMem_Free(l->p);
5453 l->p = NULL;
5454 return -1;
5455 }
5456 l->p = tmp;
5457 }
5458
5459 l->allocated = new_size;
5460 assert(l->allocated == 2 * l->size);
5461 }
5462
5463 l->p[l->size++] = exp;
5464
5465 ExprList_check_invariants(l);
5466 return 0;
5467 }
5468
5469 static void
ExprList_Dealloc(ExprList * l)5470 ExprList_Dealloc(ExprList *l)
5471 {
5472 ExprList_check_invariants(l);
5473
5474 /* If there's been an error, or we've never dynamically allocated,
5475 do nothing. */
5476 if (!l->p || l->p == l->data) {
5477 /* Do nothing. */
5478 } else {
5479 /* We have dynamically allocated. Free the memory. */
5480 PyMem_Free(l->p);
5481 }
5482 l->p = NULL;
5483 l->size = -1;
5484 }
5485
5486 static asdl_seq *
ExprList_Finish(ExprList * l,PyArena * arena)5487 ExprList_Finish(ExprList *l, PyArena *arena)
5488 {
5489 asdl_seq *seq;
5490
5491 ExprList_check_invariants(l);
5492
5493 /* Allocate the asdl_seq and copy the expressions in to it. */
5494 seq = _Py_asdl_seq_new(l->size, arena);
5495 if (seq) {
5496 Py_ssize_t i;
5497 for (i = 0; i < l->size; i++)
5498 asdl_seq_SET(seq, i, l->p[i]);
5499 }
5500 ExprList_Dealloc(l);
5501 return seq;
5502 }
5503
5504 /* The FstringParser is designed to add a mix of strings and
5505 f-strings, and concat them together as needed. Ultimately, it
5506 generates an expr_ty. */
5507 typedef struct {
5508 PyObject *last_str;
5509 ExprList expr_list;
5510 int fmode;
5511 } FstringParser;
5512
5513 #ifdef NDEBUG
5514 #define FstringParser_check_invariants(state)
5515 #else
5516 static void
FstringParser_check_invariants(FstringParser * state)5517 FstringParser_check_invariants(FstringParser *state)
5518 {
5519 if (state->last_str)
5520 assert(PyUnicode_CheckExact(state->last_str));
5521 ExprList_check_invariants(&state->expr_list);
5522 }
5523 #endif
5524
5525 static void
FstringParser_Init(FstringParser * state)5526 FstringParser_Init(FstringParser *state)
5527 {
5528 state->last_str = NULL;
5529 state->fmode = 0;
5530 ExprList_Init(&state->expr_list);
5531 FstringParser_check_invariants(state);
5532 }
5533
5534 static void
FstringParser_Dealloc(FstringParser * state)5535 FstringParser_Dealloc(FstringParser *state)
5536 {
5537 FstringParser_check_invariants(state);
5538
5539 Py_XDECREF(state->last_str);
5540 ExprList_Dealloc(&state->expr_list);
5541 }
5542
5543 /* Constants for the following */
5544 static PyObject *u_kind;
5545
5546 /* Compute 'kind' field for string Constant (either 'u' or None) */
5547 static PyObject *
make_kind(struct compiling * c,const node * n)5548 make_kind(struct compiling *c, const node *n)
5549 {
5550 char *s = NULL;
5551 PyObject *kind = NULL;
5552
5553 /* Find the first string literal, if any */
5554 while (TYPE(n) != STRING) {
5555 if (NCH(n) == 0)
5556 return NULL;
5557 n = CHILD(n, 0);
5558 }
5559 REQ(n, STRING);
5560
5561 /* If it starts with 'u', return a PyUnicode "u" string */
5562 s = STR(n);
5563 if (s && *s == 'u') {
5564 if (!u_kind) {
5565 u_kind = PyUnicode_InternFromString("u");
5566 if (!u_kind)
5567 return NULL;
5568 }
5569 kind = u_kind;
5570 if (PyArena_AddPyObject(c->c_arena, kind) < 0) {
5571 return NULL;
5572 }
5573 Py_INCREF(kind);
5574 }
5575 return kind;
5576 }
5577
5578 /* Make a Constant node, but decref the PyUnicode object being added. */
5579 static expr_ty
make_str_node_and_del(PyObject ** str,struct compiling * c,const node * n)5580 make_str_node_and_del(PyObject **str, struct compiling *c, const node* n)
5581 {
5582 PyObject *s = *str;
5583 PyObject *kind = NULL;
5584 *str = NULL;
5585 assert(PyUnicode_CheckExact(s));
5586 if (PyArena_AddPyObject(c->c_arena, s) < 0) {
5587 Py_DECREF(s);
5588 return NULL;
5589 }
5590 kind = make_kind(c, n);
5591 if (kind == NULL && PyErr_Occurred())
5592 return NULL;
5593 return Constant(s, kind, LINENO(n), n->n_col_offset,
5594 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
5595 }
5596
5597 /* Add a non-f-string (that is, a regular literal string). str is
5598 decref'd. */
5599 static int
FstringParser_ConcatAndDel(FstringParser * state,PyObject * str)5600 FstringParser_ConcatAndDel(FstringParser *state, PyObject *str)
5601 {
5602 FstringParser_check_invariants(state);
5603
5604 assert(PyUnicode_CheckExact(str));
5605
5606 if (PyUnicode_GET_LENGTH(str) == 0) {
5607 Py_DECREF(str);
5608 return 0;
5609 }
5610
5611 if (!state->last_str) {
5612 /* We didn't have a string before, so just remember this one. */
5613 state->last_str = str;
5614 } else {
5615 /* Concatenate this with the previous string. */
5616 PyUnicode_AppendAndDel(&state->last_str, str);
5617 if (!state->last_str)
5618 return -1;
5619 }
5620 FstringParser_check_invariants(state);
5621 return 0;
5622 }
5623
5624 /* Parse an f-string. The f-string is in *str to end, with no
5625 'f' or quotes. */
5626 static int
FstringParser_ConcatFstring(FstringParser * state,const char ** str,const char * end,int raw,int recurse_lvl,struct compiling * c,const node * n)5627 FstringParser_ConcatFstring(FstringParser *state, const char **str,
5628 const char *end, int raw, int recurse_lvl,
5629 struct compiling *c, const node *n)
5630 {
5631 FstringParser_check_invariants(state);
5632 state->fmode = 1;
5633
5634 /* Parse the f-string. */
5635 while (1) {
5636 PyObject *literal = NULL;
5637 PyObject *expr_text = NULL;
5638 expr_ty expression = NULL;
5639
5640 /* If there's a zero length literal in front of the
5641 expression, literal will be NULL. If we're at the end of
5642 the f-string, expression will be NULL (unless result == 1,
5643 see below). */
5644 int result = fstring_find_literal_and_expr(str, end, raw, recurse_lvl,
5645 &literal, &expr_text,
5646 &expression, c, n);
5647 if (result < 0)
5648 return -1;
5649
5650 /* Add the literal, if any. */
5651 if (literal && FstringParser_ConcatAndDel(state, literal) < 0) {
5652 Py_XDECREF(expr_text);
5653 return -1;
5654 }
5655 /* Add the expr_text, if any. */
5656 if (expr_text && FstringParser_ConcatAndDel(state, expr_text) < 0) {
5657 return -1;
5658 }
5659
5660 /* We've dealt with the literal and expr_text, their ownership has
5661 been transferred to the state object. Don't look at them again. */
5662
5663 /* See if we should just loop around to get the next literal
5664 and expression, while ignoring the expression this
5665 time. This is used for un-doubling braces, as an
5666 optimization. */
5667 if (result == 1)
5668 continue;
5669
5670 if (!expression)
5671 /* We're done with this f-string. */
5672 break;
5673
5674 /* We know we have an expression. Convert any existing string
5675 to a Constant node. */
5676 if (!state->last_str) {
5677 /* Do nothing. No previous literal. */
5678 } else {
5679 /* Convert the existing last_str literal to a Constant node. */
5680 expr_ty str = make_str_node_and_del(&state->last_str, c, n);
5681 if (!str || ExprList_Append(&state->expr_list, str) < 0)
5682 return -1;
5683 }
5684
5685 if (ExprList_Append(&state->expr_list, expression) < 0)
5686 return -1;
5687 }
5688
5689 /* If recurse_lvl is zero, then we must be at the end of the
5690 string. Otherwise, we must be at a right brace. */
5691
5692 if (recurse_lvl == 0 && *str < end-1) {
5693 ast_error(c, n, "f-string: unexpected end of string");
5694 return -1;
5695 }
5696 if (recurse_lvl != 0 && **str != '}') {
5697 ast_error(c, n, "f-string: expecting '}'");
5698 return -1;
5699 }
5700
5701 FstringParser_check_invariants(state);
5702 return 0;
5703 }
5704
5705 /* Convert the partial state reflected in last_str and expr_list to an
5706 expr_ty. The expr_ty can be a Constant, or a JoinedStr. */
5707 static expr_ty
FstringParser_Finish(FstringParser * state,struct compiling * c,const node * n)5708 FstringParser_Finish(FstringParser *state, struct compiling *c,
5709 const node *n)
5710 {
5711 asdl_seq *seq;
5712
5713 FstringParser_check_invariants(state);
5714
5715 /* If we're just a constant string with no expressions, return
5716 that. */
5717 if (!state->fmode) {
5718 assert(!state->expr_list.size);
5719 if (!state->last_str) {
5720 /* Create a zero length string. */
5721 state->last_str = PyUnicode_FromStringAndSize(NULL, 0);
5722 if (!state->last_str)
5723 goto error;
5724 }
5725 return make_str_node_and_del(&state->last_str, c, n);
5726 }
5727
5728 /* Create a Constant node out of last_str, if needed. It will be the
5729 last node in our expression list. */
5730 if (state->last_str) {
5731 expr_ty str = make_str_node_and_del(&state->last_str, c, n);
5732 if (!str || ExprList_Append(&state->expr_list, str) < 0)
5733 goto error;
5734 }
5735 /* This has already been freed. */
5736 assert(state->last_str == NULL);
5737
5738 seq = ExprList_Finish(&state->expr_list, c->c_arena);
5739 if (!seq)
5740 goto error;
5741
5742 return JoinedStr(seq, LINENO(n), n->n_col_offset,
5743 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
5744
5745 error:
5746 FstringParser_Dealloc(state);
5747 return NULL;
5748 }
5749
5750 /* Given an f-string (with no 'f' or quotes) that's in *str and ends
5751 at end, parse it into an expr_ty. Return NULL on error. Adjust
5752 str to point past the parsed portion. */
5753 static expr_ty
fstring_parse(const char ** str,const char * end,int raw,int recurse_lvl,struct compiling * c,const node * n)5754 fstring_parse(const char **str, const char *end, int raw, int recurse_lvl,
5755 struct compiling *c, const node *n)
5756 {
5757 FstringParser state;
5758
5759 FstringParser_Init(&state);
5760 if (FstringParser_ConcatFstring(&state, str, end, raw, recurse_lvl,
5761 c, n) < 0) {
5762 FstringParser_Dealloc(&state);
5763 return NULL;
5764 }
5765
5766 return FstringParser_Finish(&state, c, n);
5767 }
5768
5769 /* n is a Python string literal, including the bracketing quote
5770 characters, and r, b, u, &/or f prefixes (if any), and embedded
5771 escape sequences (if any). parsestr parses it, and sets *result to
5772 decoded Python string object. If the string is an f-string, set
5773 *fstr and *fstrlen to the unparsed string object. Return 0 if no
5774 errors occurred.
5775 */
5776 static int
parsestr(struct compiling * c,const node * n,int * bytesmode,int * rawmode,PyObject ** result,const char ** fstr,Py_ssize_t * fstrlen)5777 parsestr(struct compiling *c, const node *n, int *bytesmode, int *rawmode,
5778 PyObject **result, const char **fstr, Py_ssize_t *fstrlen)
5779 {
5780 size_t len;
5781 const char *s = STR(n);
5782 int quote = Py_CHARMASK(*s);
5783 int fmode = 0;
5784 *bytesmode = 0;
5785 *rawmode = 0;
5786 *result = NULL;
5787 *fstr = NULL;
5788 if (Py_ISALPHA(quote)) {
5789 while (!*bytesmode || !*rawmode) {
5790 if (quote == 'b' || quote == 'B') {
5791 quote = *++s;
5792 *bytesmode = 1;
5793 }
5794 else if (quote == 'u' || quote == 'U') {
5795 quote = *++s;
5796 }
5797 else if (quote == 'r' || quote == 'R') {
5798 quote = *++s;
5799 *rawmode = 1;
5800 }
5801 else if (quote == 'f' || quote == 'F') {
5802 quote = *++s;
5803 fmode = 1;
5804 }
5805 else {
5806 break;
5807 }
5808 }
5809 }
5810
5811 /* fstrings are only allowed in Python 3.6 and greater */
5812 if (fmode && c->c_feature_version < 6) {
5813 ast_error(c, n, "Format strings are only supported in Python 3.6 and greater");
5814 return -1;
5815 }
5816
5817 if (fmode && *bytesmode) {
5818 PyErr_BadInternalCall();
5819 return -1;
5820 }
5821 if (quote != '\'' && quote != '\"') {
5822 PyErr_BadInternalCall();
5823 return -1;
5824 }
5825 /* Skip the leading quote char. */
5826 s++;
5827 len = strlen(s);
5828 if (len > INT_MAX) {
5829 PyErr_SetString(PyExc_OverflowError,
5830 "string to parse is too long");
5831 return -1;
5832 }
5833 if (s[--len] != quote) {
5834 /* Last quote char must match the first. */
5835 PyErr_BadInternalCall();
5836 return -1;
5837 }
5838 if (len >= 4 && s[0] == quote && s[1] == quote) {
5839 /* A triple quoted string. We've already skipped one quote at
5840 the start and one at the end of the string. Now skip the
5841 two at the start. */
5842 s += 2;
5843 len -= 2;
5844 /* And check that the last two match. */
5845 if (s[--len] != quote || s[--len] != quote) {
5846 PyErr_BadInternalCall();
5847 return -1;
5848 }
5849 }
5850
5851 if (fmode) {
5852 /* Just return the bytes. The caller will parse the resulting
5853 string. */
5854 *fstr = s;
5855 *fstrlen = len;
5856 return 0;
5857 }
5858
5859 /* Not an f-string. */
5860 /* Avoid invoking escape decoding routines if possible. */
5861 *rawmode = *rawmode || strchr(s, '\\') == NULL;
5862 if (*bytesmode) {
5863 /* Disallow non-ASCII characters. */
5864 const char *ch;
5865 for (ch = s; *ch; ch++) {
5866 if (Py_CHARMASK(*ch) >= 0x80) {
5867 ast_error(c, n,
5868 "bytes can only contain ASCII "
5869 "literal characters.");
5870 return -1;
5871 }
5872 }
5873 if (*rawmode)
5874 *result = PyBytes_FromStringAndSize(s, len);
5875 else
5876 *result = decode_bytes_with_escapes(c, n, s, len);
5877 } else {
5878 if (*rawmode)
5879 *result = PyUnicode_DecodeUTF8Stateful(s, len, NULL, NULL);
5880 else
5881 *result = decode_unicode_with_escapes(c, n, s, len);
5882 }
5883 return *result == NULL ? -1 : 0;
5884 }
5885
5886 /* Accepts a STRING+ atom, and produces an expr_ty node. Run through
5887 each STRING atom, and process it as needed. For bytes, just
5888 concatenate them together, and the result will be a Constant node. For
5889 normal strings and f-strings, concatenate them together. The result
5890 will be a Constant node if there were no f-strings; a FormattedValue
5891 node if there's just an f-string (with no leading or trailing
5892 literals), or a JoinedStr node if there are multiple f-strings or
5893 any literals involved. */
5894 static expr_ty
parsestrplus(struct compiling * c,const node * n)5895 parsestrplus(struct compiling *c, const node *n)
5896 {
5897 int bytesmode = 0;
5898 PyObject *bytes_str = NULL;
5899 int i;
5900
5901 FstringParser state;
5902 FstringParser_Init(&state);
5903
5904 for (i = 0; i < NCH(n); i++) {
5905 int this_bytesmode;
5906 int this_rawmode;
5907 PyObject *s;
5908 const char *fstr;
5909 Py_ssize_t fstrlen = -1; /* Silence a compiler warning. */
5910
5911 REQ(CHILD(n, i), STRING);
5912 if (parsestr(c, CHILD(n, i), &this_bytesmode, &this_rawmode, &s,
5913 &fstr, &fstrlen) != 0)
5914 goto error;
5915
5916 /* Check that we're not mixing bytes with unicode. */
5917 if (i != 0 && bytesmode != this_bytesmode) {
5918 ast_error(c, n, "cannot mix bytes and nonbytes literals");
5919 /* s is NULL if the current string part is an f-string. */
5920 Py_XDECREF(s);
5921 goto error;
5922 }
5923 bytesmode = this_bytesmode;
5924
5925 if (fstr != NULL) {
5926 int result;
5927 assert(s == NULL && !bytesmode);
5928 /* This is an f-string. Parse and concatenate it. */
5929 result = FstringParser_ConcatFstring(&state, &fstr, fstr+fstrlen,
5930 this_rawmode, 0, c, n);
5931 if (result < 0)
5932 goto error;
5933 } else {
5934 /* A string or byte string. */
5935 assert(s != NULL && fstr == NULL);
5936
5937 assert(bytesmode ? PyBytes_CheckExact(s) :
5938 PyUnicode_CheckExact(s));
5939
5940 if (bytesmode) {
5941 /* For bytes, concat as we go. */
5942 if (i == 0) {
5943 /* First time, just remember this value. */
5944 bytes_str = s;
5945 } else {
5946 PyBytes_ConcatAndDel(&bytes_str, s);
5947 if (!bytes_str)
5948 goto error;
5949 }
5950 } else {
5951 /* This is a regular string. Concatenate it. */
5952 if (FstringParser_ConcatAndDel(&state, s) < 0)
5953 goto error;
5954 }
5955 }
5956 }
5957 if (bytesmode) {
5958 /* Just return the bytes object and we're done. */
5959 if (PyArena_AddPyObject(c->c_arena, bytes_str) < 0)
5960 goto error;
5961 return Constant(bytes_str, NULL, LINENO(n), n->n_col_offset,
5962 n->n_end_lineno, n->n_end_col_offset, c->c_arena);
5963 }
5964
5965 /* We're not a bytes string, bytes_str should never have been set. */
5966 assert(bytes_str == NULL);
5967
5968 return FstringParser_Finish(&state, c, n);
5969
5970 error:
5971 Py_XDECREF(bytes_str);
5972 FstringParser_Dealloc(&state);
5973 return NULL;
5974 }
5975
5976 PyObject *
_PyAST_GetDocString(asdl_seq * body)5977 _PyAST_GetDocString(asdl_seq *body)
5978 {
5979 if (!asdl_seq_LEN(body)) {
5980 return NULL;
5981 }
5982 stmt_ty st = (stmt_ty)asdl_seq_GET(body, 0);
5983 if (st->kind != Expr_kind) {
5984 return NULL;
5985 }
5986 expr_ty e = st->v.Expr.value;
5987 if (e->kind == Constant_kind && PyUnicode_CheckExact(e->v.Constant.value)) {
5988 return e->v.Constant.value;
5989 }
5990 return NULL;
5991 }
5992