1 /*
2 * This file includes functions to transform a concrete syntax tree (CST) to
3 * an abstract syntax tree (AST). The main function is PyAST_FromNode().
4 *
5 */
6 #include "Python.h"
7 #include "Python-ast.h"
8 #include "grammar.h"
9 #include "node.h"
10 #include "pyarena.h"
11 #include "ast.h"
12 #include "token.h"
13 #include "parsetok.h"
14 #include "graminit.h"
15
16 #include <assert.h>
17
18 /* Data structure used internally */
19 struct compiling {
20 char *c_encoding; /* source encoding */
21 int c_future_unicode; /* __future__ unicode literals flag */
22 PyArena *c_arena; /* arena for allocating memeory */
23 const char *c_filename; /* filename */
24 };
25
26 static asdl_seq *seq_for_testlist(struct compiling *, const node *);
27 static expr_ty ast_for_expr(struct compiling *, const node *);
28 static stmt_ty ast_for_stmt(struct compiling *, const node *);
29 static asdl_seq *ast_for_suite(struct compiling *, const node *);
30 static asdl_seq *ast_for_exprlist(struct compiling *, const node *,
31 expr_context_ty);
32 static expr_ty ast_for_testlist(struct compiling *, const node *);
33 static stmt_ty ast_for_classdef(struct compiling *, const node *, asdl_seq *);
34 static expr_ty ast_for_testlist_comp(struct compiling *, const node *);
35
36 /* Note different signature for ast_for_call */
37 static expr_ty ast_for_call(struct compiling *, const node *, expr_ty);
38
39 static PyObject *parsenumber(struct compiling *, const char *);
40 static PyObject *parsestr(struct compiling *, const node *n, const char *);
41 static PyObject *parsestrplus(struct compiling *, const node *n);
42
43 #ifndef LINENO
44 #define LINENO(n) ((n)->n_lineno)
45 #endif
46
47 #define COMP_GENEXP 0
48 #define COMP_SETCOMP 1
49
50 static identifier
new_identifier(const char * n,PyArena * arena)51 new_identifier(const char* n, PyArena *arena) {
52 PyObject* id = PyString_InternFromString(n);
53 if (id != NULL)
54 PyArena_AddPyObject(arena, id);
55 return id;
56 }
57
58 #define NEW_IDENTIFIER(n) new_identifier(STR(n), c->c_arena)
59
60 /* This routine provides an invalid object for the syntax error.
61 The outermost routine must unpack this error and create the
62 proper object. We do this so that we don't have to pass
63 the filename to everything function.
64
65 XXX Maybe we should just pass the filename...
66 */
67
68 static int
ast_error(const node * n,const char * errstr)69 ast_error(const node *n, const char *errstr)
70 {
71 PyObject *u = Py_BuildValue("zi", errstr, LINENO(n));
72 if (!u)
73 return 0;
74 PyErr_SetObject(PyExc_SyntaxError, u);
75 Py_DECREF(u);
76 return 0;
77 }
78
79 static void
ast_error_finish(const char * filename)80 ast_error_finish(const char *filename)
81 {
82 PyObject *type, *value, *tback, *errstr, *loc, *tmp;
83 long lineno;
84
85 assert(PyErr_Occurred());
86 if (!PyErr_ExceptionMatches(PyExc_SyntaxError))
87 return;
88
89 PyErr_Fetch(&type, &value, &tback);
90 errstr = PyTuple_GetItem(value, 0);
91 if (!errstr)
92 return;
93 Py_INCREF(errstr);
94 lineno = PyInt_AsLong(PyTuple_GetItem(value, 1));
95 if (lineno == -1) {
96 Py_DECREF(errstr);
97 return;
98 }
99 Py_DECREF(value);
100
101 loc = PyErr_ProgramText(filename, lineno);
102 if (!loc) {
103 Py_INCREF(Py_None);
104 loc = Py_None;
105 }
106 tmp = Py_BuildValue("(zlOO)", filename, lineno, Py_None, loc);
107 Py_DECREF(loc);
108 if (!tmp) {
109 Py_DECREF(errstr);
110 return;
111 }
112 value = PyTuple_Pack(2, errstr, tmp);
113 Py_DECREF(errstr);
114 Py_DECREF(tmp);
115 if (!value)
116 return;
117 PyErr_Restore(type, value, tback);
118 }
119
120 static int
ast_warn(struct compiling * c,const node * n,char * msg)121 ast_warn(struct compiling *c, const node *n, char *msg)
122 {
123 if (PyErr_WarnExplicit(PyExc_SyntaxWarning, msg, c->c_filename, LINENO(n),
124 NULL, NULL) < 0) {
125 /* if -Werr, change it to a SyntaxError */
126 if (PyErr_Occurred() && PyErr_ExceptionMatches(PyExc_SyntaxWarning))
127 ast_error(n, msg);
128 return 0;
129 }
130 return 1;
131 }
132
133 static int
forbidden_check(struct compiling * c,const node * n,const char * x)134 forbidden_check(struct compiling *c, const node *n, const char *x)
135 {
136 if (!strcmp(x, "None"))
137 return ast_error(n, "cannot assign to None");
138 if (!strcmp(x, "__debug__"))
139 return ast_error(n, "cannot assign to __debug__");
140 if (Py_Py3kWarningFlag) {
141 if (!(strcmp(x, "True") && strcmp(x, "False")) &&
142 !ast_warn(c, n, "assignment to True or False is forbidden in 3.x"))
143 return 0;
144 if (!strcmp(x, "nonlocal") &&
145 !ast_warn(c, n, "nonlocal is a keyword in 3.x"))
146 return 0;
147 }
148 return 1;
149 }
150
151 /* num_stmts() returns number of contained statements.
152
153 Use this routine to determine how big a sequence is needed for
154 the statements in a parse tree. Its raison d'etre is this bit of
155 grammar:
156
157 stmt: simple_stmt | compound_stmt
158 simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
159
160 A simple_stmt can contain multiple small_stmt elements joined
161 by semicolons. If the arg is a simple_stmt, the number of
162 small_stmt elements is returned.
163 */
164
165 static int
num_stmts(const node * n)166 num_stmts(const node *n)
167 {
168 int i, l;
169 node *ch;
170
171 switch (TYPE(n)) {
172 case single_input:
173 if (TYPE(CHILD(n, 0)) == NEWLINE)
174 return 0;
175 else
176 return num_stmts(CHILD(n, 0));
177 case file_input:
178 l = 0;
179 for (i = 0; i < NCH(n); i++) {
180 ch = CHILD(n, i);
181 if (TYPE(ch) == stmt)
182 l += num_stmts(ch);
183 }
184 return l;
185 case stmt:
186 return num_stmts(CHILD(n, 0));
187 case compound_stmt:
188 return 1;
189 case simple_stmt:
190 return NCH(n) / 2; /* Divide by 2 to remove count of semi-colons */
191 case suite:
192 if (NCH(n) == 1)
193 return num_stmts(CHILD(n, 0));
194 else {
195 l = 0;
196 for (i = 2; i < (NCH(n) - 1); i++)
197 l += num_stmts(CHILD(n, i));
198 return l;
199 }
200 default: {
201 char buf[128];
202
203 sprintf(buf, "Non-statement found: %d %d",
204 TYPE(n), NCH(n));
205 Py_FatalError(buf);
206 }
207 }
208 assert(0);
209 return 0;
210 }
211
212 /* Transform the CST rooted at node * to the appropriate AST
213 */
214
215 mod_ty
PyAST_FromNode(const node * n,PyCompilerFlags * flags,const char * filename,PyArena * arena)216 PyAST_FromNode(const node *n, PyCompilerFlags *flags, const char *filename,
217 PyArena *arena)
218 {
219 int i, j, k, num;
220 asdl_seq *stmts = NULL;
221 stmt_ty s;
222 node *ch;
223 struct compiling c;
224
225 if (flags && flags->cf_flags & PyCF_SOURCE_IS_UTF8) {
226 c.c_encoding = "utf-8";
227 if (TYPE(n) == encoding_decl) {
228 ast_error(n, "encoding declaration in Unicode string");
229 goto error;
230 }
231 } else if (TYPE(n) == encoding_decl) {
232 c.c_encoding = STR(n);
233 n = CHILD(n, 0);
234 } else {
235 c.c_encoding = NULL;
236 }
237 c.c_future_unicode = flags && flags->cf_flags & CO_FUTURE_UNICODE_LITERALS;
238 c.c_arena = arena;
239 c.c_filename = filename;
240
241 k = 0;
242 switch (TYPE(n)) {
243 case file_input:
244 stmts = asdl_seq_new(num_stmts(n), arena);
245 if (!stmts)
246 return NULL;
247 for (i = 0; i < NCH(n) - 1; i++) {
248 ch = CHILD(n, i);
249 if (TYPE(ch) == NEWLINE)
250 continue;
251 REQ(ch, stmt);
252 num = num_stmts(ch);
253 if (num == 1) {
254 s = ast_for_stmt(&c, ch);
255 if (!s)
256 goto error;
257 asdl_seq_SET(stmts, k++, s);
258 }
259 else {
260 ch = CHILD(ch, 0);
261 REQ(ch, simple_stmt);
262 for (j = 0; j < num; j++) {
263 s = ast_for_stmt(&c, CHILD(ch, j * 2));
264 if (!s)
265 goto error;
266 asdl_seq_SET(stmts, k++, s);
267 }
268 }
269 }
270 return Module(stmts, arena);
271 case eval_input: {
272 expr_ty testlist_ast;
273
274 /* XXX Why not comp_for here? */
275 testlist_ast = ast_for_testlist(&c, CHILD(n, 0));
276 if (!testlist_ast)
277 goto error;
278 return Expression(testlist_ast, arena);
279 }
280 case single_input:
281 if (TYPE(CHILD(n, 0)) == NEWLINE) {
282 stmts = asdl_seq_new(1, arena);
283 if (!stmts)
284 goto error;
285 asdl_seq_SET(stmts, 0, Pass(n->n_lineno, n->n_col_offset,
286 arena));
287 if (!asdl_seq_GET(stmts, 0))
288 goto error;
289 return Interactive(stmts, arena);
290 }
291 else {
292 n = CHILD(n, 0);
293 num = num_stmts(n);
294 stmts = asdl_seq_new(num, arena);
295 if (!stmts)
296 goto error;
297 if (num == 1) {
298 s = ast_for_stmt(&c, n);
299 if (!s)
300 goto error;
301 asdl_seq_SET(stmts, 0, s);
302 }
303 else {
304 /* Only a simple_stmt can contain multiple statements. */
305 REQ(n, simple_stmt);
306 for (i = 0; i < NCH(n); i += 2) {
307 if (TYPE(CHILD(n, i)) == NEWLINE)
308 break;
309 s = ast_for_stmt(&c, CHILD(n, i));
310 if (!s)
311 goto error;
312 asdl_seq_SET(stmts, i / 2, s);
313 }
314 }
315
316 return Interactive(stmts, arena);
317 }
318 default:
319 PyErr_Format(PyExc_SystemError,
320 "invalid node %d for PyAST_FromNode", TYPE(n));
321 goto error;
322 }
323 error:
324 ast_error_finish(filename);
325 return NULL;
326 }
327
328 /* Return the AST repr. of the operator represented as syntax (|, ^, etc.)
329 */
330
331 static operator_ty
get_operator(const node * n)332 get_operator(const node *n)
333 {
334 switch (TYPE(n)) {
335 case VBAR:
336 return BitOr;
337 case CIRCUMFLEX:
338 return BitXor;
339 case AMPER:
340 return BitAnd;
341 case LEFTSHIFT:
342 return LShift;
343 case RIGHTSHIFT:
344 return RShift;
345 case PLUS:
346 return Add;
347 case MINUS:
348 return Sub;
349 case STAR:
350 return Mult;
351 case SLASH:
352 return Div;
353 case DOUBLESLASH:
354 return FloorDiv;
355 case PERCENT:
356 return Mod;
357 default:
358 return (operator_ty)0;
359 }
360 }
361
362 /* Set the context ctx for expr_ty e, recursively traversing e.
363
364 Only sets context for expr kinds that "can appear in assignment context"
365 (according to ../Parser/Python.asdl). For other expr kinds, it sets
366 an appropriate syntax error and returns false.
367 */
368
369 static int
set_context(struct compiling * c,expr_ty e,expr_context_ty ctx,const node * n)370 set_context(struct compiling *c, expr_ty e, expr_context_ty ctx, const node *n)
371 {
372 asdl_seq *s = NULL;
373 /* If a particular expression type can't be used for assign / delete,
374 set expr_name to its name and an error message will be generated.
375 */
376 const char* expr_name = NULL;
377
378 /* The ast defines augmented store and load contexts, but the
379 implementation here doesn't actually use them. The code may be
380 a little more complex than necessary as a result. It also means
381 that expressions in an augmented assignment have a Store context.
382 Consider restructuring so that augmented assignment uses
383 set_context(), too.
384 */
385 assert(ctx != AugStore && ctx != AugLoad);
386
387 switch (e->kind) {
388 case Attribute_kind:
389 if (ctx == Store && !forbidden_check(c, n,
390 PyBytes_AS_STRING(e->v.Attribute.attr)))
391 return 0;
392 e->v.Attribute.ctx = ctx;
393 break;
394 case Subscript_kind:
395 e->v.Subscript.ctx = ctx;
396 break;
397 case Name_kind:
398 if (ctx == Store && !forbidden_check(c, n,
399 PyBytes_AS_STRING(e->v.Name.id)))
400 return 0;
401 e->v.Name.ctx = ctx;
402 break;
403 case List_kind:
404 e->v.List.ctx = ctx;
405 s = e->v.List.elts;
406 break;
407 case Tuple_kind:
408 if (asdl_seq_LEN(e->v.Tuple.elts)) {
409 e->v.Tuple.ctx = ctx;
410 s = e->v.Tuple.elts;
411 }
412 else {
413 expr_name = "()";
414 }
415 break;
416 case Lambda_kind:
417 expr_name = "lambda";
418 break;
419 case Call_kind:
420 expr_name = "function call";
421 break;
422 case BoolOp_kind:
423 case BinOp_kind:
424 case UnaryOp_kind:
425 expr_name = "operator";
426 break;
427 case GeneratorExp_kind:
428 expr_name = "generator expression";
429 break;
430 case Yield_kind:
431 expr_name = "yield expression";
432 break;
433 case ListComp_kind:
434 expr_name = "list comprehension";
435 break;
436 case SetComp_kind:
437 expr_name = "set comprehension";
438 break;
439 case DictComp_kind:
440 expr_name = "dict comprehension";
441 break;
442 case Dict_kind:
443 case Set_kind:
444 case Num_kind:
445 case Str_kind:
446 expr_name = "literal";
447 break;
448 case Compare_kind:
449 expr_name = "comparison";
450 break;
451 case Repr_kind:
452 expr_name = "repr";
453 break;
454 case IfExp_kind:
455 expr_name = "conditional expression";
456 break;
457 default:
458 PyErr_Format(PyExc_SystemError,
459 "unexpected expression in assignment %d (line %d)",
460 e->kind, e->lineno);
461 return 0;
462 }
463 /* Check for error string set by switch */
464 if (expr_name) {
465 char buf[300];
466 PyOS_snprintf(buf, sizeof(buf),
467 "can't %s %s",
468 ctx == Store ? "assign to" : "delete",
469 expr_name);
470 return ast_error(n, buf);
471 }
472
473 /* If the LHS is a list or tuple, we need to set the assignment
474 context for all the contained elements.
475 */
476 if (s) {
477 int i;
478
479 for (i = 0; i < asdl_seq_LEN(s); i++) {
480 if (!set_context(c, (expr_ty)asdl_seq_GET(s, i), ctx, n))
481 return 0;
482 }
483 }
484 return 1;
485 }
486
487 static operator_ty
ast_for_augassign(struct compiling * c,const node * n)488 ast_for_augassign(struct compiling *c, const node *n)
489 {
490 REQ(n, augassign);
491 n = CHILD(n, 0);
492 switch (STR(n)[0]) {
493 case '+':
494 return Add;
495 case '-':
496 return Sub;
497 case '/':
498 if (STR(n)[1] == '/')
499 return FloorDiv;
500 else
501 return Div;
502 case '%':
503 return Mod;
504 case '<':
505 return LShift;
506 case '>':
507 return RShift;
508 case '&':
509 return BitAnd;
510 case '^':
511 return BitXor;
512 case '|':
513 return BitOr;
514 case '*':
515 if (STR(n)[1] == '*')
516 return Pow;
517 else
518 return Mult;
519 default:
520 PyErr_Format(PyExc_SystemError, "invalid augassign: %s", STR(n));
521 return (operator_ty)0;
522 }
523 }
524
525 static cmpop_ty
ast_for_comp_op(struct compiling * c,const node * n)526 ast_for_comp_op(struct compiling *c, const node *n)
527 {
528 /* comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'
529 |'is' 'not'
530 */
531 REQ(n, comp_op);
532 if (NCH(n) == 1) {
533 n = CHILD(n, 0);
534 switch (TYPE(n)) {
535 case LESS:
536 return Lt;
537 case GREATER:
538 return Gt;
539 case EQEQUAL: /* == */
540 return Eq;
541 case LESSEQUAL:
542 return LtE;
543 case GREATEREQUAL:
544 return GtE;
545 case NOTEQUAL:
546 return NotEq;
547 case NAME:
548 if (strcmp(STR(n), "in") == 0)
549 return In;
550 if (strcmp(STR(n), "is") == 0)
551 return Is;
552 default:
553 PyErr_Format(PyExc_SystemError, "invalid comp_op: %s",
554 STR(n));
555 return (cmpop_ty)0;
556 }
557 }
558 else if (NCH(n) == 2) {
559 /* handle "not in" and "is not" */
560 switch (TYPE(CHILD(n, 0))) {
561 case NAME:
562 if (strcmp(STR(CHILD(n, 1)), "in") == 0)
563 return NotIn;
564 if (strcmp(STR(CHILD(n, 0)), "is") == 0)
565 return IsNot;
566 default:
567 PyErr_Format(PyExc_SystemError, "invalid comp_op: %s %s",
568 STR(CHILD(n, 0)), STR(CHILD(n, 1)));
569 return (cmpop_ty)0;
570 }
571 }
572 PyErr_Format(PyExc_SystemError, "invalid comp_op: has %d children",
573 NCH(n));
574 return (cmpop_ty)0;
575 }
576
577 static asdl_seq *
seq_for_testlist(struct compiling * c,const node * n)578 seq_for_testlist(struct compiling *c, const node *n)
579 {
580 /* testlist: test (',' test)* [','] */
581 asdl_seq *seq;
582 expr_ty expression;
583 int i;
584 assert(TYPE(n) == testlist ||
585 TYPE(n) == listmaker ||
586 TYPE(n) == testlist_comp ||
587 TYPE(n) == testlist_safe ||
588 TYPE(n) == testlist1);
589
590 seq = asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
591 if (!seq)
592 return NULL;
593
594 for (i = 0; i < NCH(n); i += 2) {
595 assert(TYPE(CHILD(n, i)) == test || TYPE(CHILD(n, i)) == old_test);
596
597 expression = ast_for_expr(c, CHILD(n, i));
598 if (!expression)
599 return NULL;
600
601 assert(i / 2 < seq->size);
602 asdl_seq_SET(seq, i / 2, expression);
603 }
604 return seq;
605 }
606
607 static expr_ty
compiler_complex_args(struct compiling * c,const node * n)608 compiler_complex_args(struct compiling *c, const node *n)
609 {
610 int i, len = (NCH(n) + 1) / 2;
611 expr_ty result;
612 asdl_seq *args = asdl_seq_new(len, c->c_arena);
613 if (!args)
614 return NULL;
615
616 /* fpdef: NAME | '(' fplist ')'
617 fplist: fpdef (',' fpdef)* [',']
618 */
619 REQ(n, fplist);
620 for (i = 0; i < len; i++) {
621 PyObject *arg_id;
622 const node *fpdef_node = CHILD(n, 2*i);
623 const node *child;
624 expr_ty arg;
625 set_name:
626 /* fpdef_node is either a NAME or an fplist */
627 child = CHILD(fpdef_node, 0);
628 if (TYPE(child) == NAME) {
629 if (!forbidden_check(c, n, STR(child)))
630 return NULL;
631 arg_id = NEW_IDENTIFIER(child);
632 if (!arg_id)
633 return NULL;
634 arg = Name(arg_id, Store, LINENO(child), child->n_col_offset,
635 c->c_arena);
636 }
637 else {
638 assert(TYPE(fpdef_node) == fpdef);
639 /* fpdef_node[0] is not a name, so it must be '(', get CHILD[1] */
640 child = CHILD(fpdef_node, 1);
641 assert(TYPE(child) == fplist);
642 /* NCH == 1 means we have (x), we need to elide the extra parens */
643 if (NCH(child) == 1) {
644 fpdef_node = CHILD(child, 0);
645 assert(TYPE(fpdef_node) == fpdef);
646 goto set_name;
647 }
648 arg = compiler_complex_args(c, child);
649 }
650 asdl_seq_SET(args, i, arg);
651 }
652
653 result = Tuple(args, Store, LINENO(n), n->n_col_offset, c->c_arena);
654 if (!set_context(c, result, Store, n))
655 return NULL;
656 return result;
657 }
658
659
660 /* Create AST for argument list. */
661
662 static arguments_ty
ast_for_arguments(struct compiling * c,const node * n)663 ast_for_arguments(struct compiling *c, const node *n)
664 {
665 /* parameters: '(' [varargslist] ')'
666 varargslist: (fpdef ['=' test] ',')* ('*' NAME [',' '**' NAME]
667 | '**' NAME) | fpdef ['=' test] (',' fpdef ['=' test])* [',']
668 */
669 int i, j, k, n_args = 0, n_defaults = 0, found_default = 0;
670 asdl_seq *args, *defaults;
671 identifier vararg = NULL, kwarg = NULL;
672 node *ch;
673
674 if (TYPE(n) == parameters) {
675 if (NCH(n) == 2) /* () as argument list */
676 return arguments(NULL, NULL, NULL, NULL, c->c_arena);
677 n = CHILD(n, 1);
678 }
679 REQ(n, varargslist);
680
681 /* first count the number of normal args & defaults */
682 for (i = 0; i < NCH(n); i++) {
683 ch = CHILD(n, i);
684 if (TYPE(ch) == fpdef)
685 n_args++;
686 if (TYPE(ch) == EQUAL)
687 n_defaults++;
688 }
689 args = (n_args ? asdl_seq_new(n_args, c->c_arena) : NULL);
690 if (!args && n_args)
691 return NULL;
692 defaults = (n_defaults ? asdl_seq_new(n_defaults, c->c_arena) : NULL);
693 if (!defaults && n_defaults)
694 return NULL;
695
696 /* fpdef: NAME | '(' fplist ')'
697 fplist: fpdef (',' fpdef)* [',']
698 */
699 i = 0;
700 j = 0; /* index for defaults */
701 k = 0; /* index for args */
702 while (i < NCH(n)) {
703 ch = CHILD(n, i);
704 switch (TYPE(ch)) {
705 case fpdef: {
706 int complex_args = 0, parenthesized = 0;
707 handle_fpdef:
708 /* XXX Need to worry about checking if TYPE(CHILD(n, i+1)) is
709 anything other than EQUAL or a comma? */
710 /* XXX Should NCH(n) check be made a separate check? */
711 if (i + 1 < NCH(n) && TYPE(CHILD(n, i + 1)) == EQUAL) {
712 expr_ty expression = ast_for_expr(c, CHILD(n, i + 2));
713 if (!expression)
714 return NULL;
715 assert(defaults != NULL);
716 asdl_seq_SET(defaults, j++, expression);
717 i += 2;
718 found_default = 1;
719 }
720 else if (found_default) {
721 /* def f((x)=4): pass should raise an error.
722 def f((x, (y))): pass will just incur the tuple unpacking warning. */
723 if (parenthesized && !complex_args) {
724 ast_error(n, "parenthesized arg with default");
725 return NULL;
726 }
727 ast_error(n,
728 "non-default argument follows default argument");
729 return NULL;
730 }
731 if (NCH(ch) == 3) {
732 ch = CHILD(ch, 1);
733 /* def foo((x)): is not complex, special case. */
734 if (NCH(ch) != 1) {
735 /* We have complex arguments, setup for unpacking. */
736 if (Py_Py3kWarningFlag && !ast_warn(c, ch,
737 "tuple parameter unpacking has been removed in 3.x"))
738 return NULL;
739 complex_args = 1;
740 asdl_seq_SET(args, k++, compiler_complex_args(c, ch));
741 if (!asdl_seq_GET(args, k-1))
742 return NULL;
743 } else {
744 /* def foo((x)): setup for checking NAME below. */
745 /* Loop because there can be many parens and tuple
746 unpacking mixed in. */
747 parenthesized = 1;
748 ch = CHILD(ch, 0);
749 assert(TYPE(ch) == fpdef);
750 goto handle_fpdef;
751 }
752 }
753 if (TYPE(CHILD(ch, 0)) == NAME) {
754 PyObject *id;
755 expr_ty name;
756 if (!forbidden_check(c, n, STR(CHILD(ch, 0))))
757 return NULL;
758 id = NEW_IDENTIFIER(CHILD(ch, 0));
759 if (!id)
760 return NULL;
761 name = Name(id, Param, LINENO(ch), ch->n_col_offset,
762 c->c_arena);
763 if (!name)
764 return NULL;
765 asdl_seq_SET(args, k++, name);
766
767 }
768 i += 2; /* the name and the comma */
769 if (parenthesized && Py_Py3kWarningFlag &&
770 !ast_warn(c, ch, "parenthesized argument names "
771 "are invalid in 3.x"))
772 return NULL;
773
774 break;
775 }
776 case STAR:
777 if (!forbidden_check(c, CHILD(n, i+1), STR(CHILD(n, i+1))))
778 return NULL;
779 vararg = NEW_IDENTIFIER(CHILD(n, i+1));
780 if (!vararg)
781 return NULL;
782 i += 3;
783 break;
784 case DOUBLESTAR:
785 if (!forbidden_check(c, CHILD(n, i+1), STR(CHILD(n, i+1))))
786 return NULL;
787 kwarg = NEW_IDENTIFIER(CHILD(n, i+1));
788 if (!kwarg)
789 return NULL;
790 i += 3;
791 break;
792 default:
793 PyErr_Format(PyExc_SystemError,
794 "unexpected node in varargslist: %d @ %d",
795 TYPE(ch), i);
796 return NULL;
797 }
798 }
799
800 return arguments(args, vararg, kwarg, defaults, c->c_arena);
801 }
802
803 static expr_ty
ast_for_dotted_name(struct compiling * c,const node * n)804 ast_for_dotted_name(struct compiling *c, const node *n)
805 {
806 expr_ty e;
807 identifier id;
808 int lineno, col_offset;
809 int i;
810
811 REQ(n, dotted_name);
812
813 lineno = LINENO(n);
814 col_offset = n->n_col_offset;
815
816 id = NEW_IDENTIFIER(CHILD(n, 0));
817 if (!id)
818 return NULL;
819 e = Name(id, Load, lineno, col_offset, c->c_arena);
820 if (!e)
821 return NULL;
822
823 for (i = 2; i < NCH(n); i+=2) {
824 id = NEW_IDENTIFIER(CHILD(n, i));
825 if (!id)
826 return NULL;
827 e = Attribute(e, id, Load, lineno, col_offset, c->c_arena);
828 if (!e)
829 return NULL;
830 }
831
832 return e;
833 }
834
835 static expr_ty
ast_for_decorator(struct compiling * c,const node * n)836 ast_for_decorator(struct compiling *c, const node *n)
837 {
838 /* decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE */
839 expr_ty d = NULL;
840 expr_ty name_expr;
841
842 REQ(n, decorator);
843 REQ(CHILD(n, 0), AT);
844 REQ(RCHILD(n, -1), NEWLINE);
845
846 name_expr = ast_for_dotted_name(c, CHILD(n, 1));
847 if (!name_expr)
848 return NULL;
849
850 if (NCH(n) == 3) { /* No arguments */
851 d = name_expr;
852 name_expr = NULL;
853 }
854 else if (NCH(n) == 5) { /* Call with no arguments */
855 d = Call(name_expr, NULL, NULL, NULL, NULL,
856 name_expr->lineno, name_expr->col_offset,
857 c->c_arena);
858 if (!d)
859 return NULL;
860 name_expr = NULL;
861 }
862 else {
863 d = ast_for_call(c, CHILD(n, 3), name_expr);
864 if (!d)
865 return NULL;
866 name_expr = NULL;
867 }
868
869 return d;
870 }
871
872 static asdl_seq*
ast_for_decorators(struct compiling * c,const node * n)873 ast_for_decorators(struct compiling *c, const node *n)
874 {
875 asdl_seq* decorator_seq;
876 expr_ty d;
877 int i;
878
879 REQ(n, decorators);
880 decorator_seq = asdl_seq_new(NCH(n), c->c_arena);
881 if (!decorator_seq)
882 return NULL;
883
884 for (i = 0; i < NCH(n); i++) {
885 d = ast_for_decorator(c, CHILD(n, i));
886 if (!d)
887 return NULL;
888 asdl_seq_SET(decorator_seq, i, d);
889 }
890 return decorator_seq;
891 }
892
893 static stmt_ty
ast_for_funcdef(struct compiling * c,const node * n,asdl_seq * decorator_seq)894 ast_for_funcdef(struct compiling *c, const node *n, asdl_seq *decorator_seq)
895 {
896 /* funcdef: 'def' NAME parameters ':' suite */
897 identifier name;
898 arguments_ty args;
899 asdl_seq *body;
900 int name_i = 1;
901
902 REQ(n, funcdef);
903
904 name = NEW_IDENTIFIER(CHILD(n, name_i));
905 if (!name)
906 return NULL;
907 else if (!forbidden_check(c, CHILD(n, name_i), STR(CHILD(n, name_i))))
908 return NULL;
909 args = ast_for_arguments(c, CHILD(n, name_i + 1));
910 if (!args)
911 return NULL;
912 body = ast_for_suite(c, CHILD(n, name_i + 3));
913 if (!body)
914 return NULL;
915
916 return FunctionDef(name, args, body, decorator_seq, LINENO(n),
917 n->n_col_offset, c->c_arena);
918 }
919
920 static stmt_ty
ast_for_decorated(struct compiling * c,const node * n)921 ast_for_decorated(struct compiling *c, const node *n)
922 {
923 /* decorated: decorators (classdef | funcdef) */
924 stmt_ty thing = NULL;
925 asdl_seq *decorator_seq = NULL;
926
927 REQ(n, decorated);
928
929 decorator_seq = ast_for_decorators(c, CHILD(n, 0));
930 if (!decorator_seq)
931 return NULL;
932
933 assert(TYPE(CHILD(n, 1)) == funcdef ||
934 TYPE(CHILD(n, 1)) == classdef);
935
936 if (TYPE(CHILD(n, 1)) == funcdef) {
937 thing = ast_for_funcdef(c, CHILD(n, 1), decorator_seq);
938 } else if (TYPE(CHILD(n, 1)) == classdef) {
939 thing = ast_for_classdef(c, CHILD(n, 1), decorator_seq);
940 }
941 /* we count the decorators in when talking about the class' or
942 function's line number */
943 if (thing) {
944 thing->lineno = LINENO(n);
945 thing->col_offset = n->n_col_offset;
946 }
947 return thing;
948 }
949
950 static expr_ty
ast_for_lambdef(struct compiling * c,const node * n)951 ast_for_lambdef(struct compiling *c, const node *n)
952 {
953 /* lambdef: 'lambda' [varargslist] ':' test */
954 arguments_ty args;
955 expr_ty expression;
956
957 if (NCH(n) == 3) {
958 args = arguments(NULL, NULL, NULL, NULL, c->c_arena);
959 if (!args)
960 return NULL;
961 expression = ast_for_expr(c, CHILD(n, 2));
962 if (!expression)
963 return NULL;
964 }
965 else {
966 args = ast_for_arguments(c, CHILD(n, 1));
967 if (!args)
968 return NULL;
969 expression = ast_for_expr(c, CHILD(n, 3));
970 if (!expression)
971 return NULL;
972 }
973
974 return Lambda(args, expression, LINENO(n), n->n_col_offset, c->c_arena);
975 }
976
977 static expr_ty
ast_for_ifexpr(struct compiling * c,const node * n)978 ast_for_ifexpr(struct compiling *c, const node *n)
979 {
980 /* test: or_test 'if' or_test 'else' test */
981 expr_ty expression, body, orelse;
982
983 assert(NCH(n) == 5);
984 body = ast_for_expr(c, CHILD(n, 0));
985 if (!body)
986 return NULL;
987 expression = ast_for_expr(c, CHILD(n, 2));
988 if (!expression)
989 return NULL;
990 orelse = ast_for_expr(c, CHILD(n, 4));
991 if (!orelse)
992 return NULL;
993 return IfExp(expression, body, orelse, LINENO(n), n->n_col_offset,
994 c->c_arena);
995 }
996
997 /* XXX(nnorwitz): the listcomp and genexpr code should be refactored
998 so there is only a single version. Possibly for loops can also re-use
999 the code.
1000 */
1001
1002 /* Count the number of 'for' loop in a list comprehension.
1003
1004 Helper for ast_for_listcomp().
1005 */
1006
1007 static int
count_list_fors(struct compiling * c,const node * n)1008 count_list_fors(struct compiling *c, const node *n)
1009 {
1010 int n_fors = 0;
1011 node *ch = CHILD(n, 1);
1012
1013 count_list_for:
1014 n_fors++;
1015 REQ(ch, list_for);
1016 if (NCH(ch) == 5)
1017 ch = CHILD(ch, 4);
1018 else
1019 return n_fors;
1020 count_list_iter:
1021 REQ(ch, list_iter);
1022 ch = CHILD(ch, 0);
1023 if (TYPE(ch) == list_for)
1024 goto count_list_for;
1025 else if (TYPE(ch) == list_if) {
1026 if (NCH(ch) == 3) {
1027 ch = CHILD(ch, 2);
1028 goto count_list_iter;
1029 }
1030 else
1031 return n_fors;
1032 }
1033
1034 /* Should never be reached */
1035 PyErr_SetString(PyExc_SystemError, "logic error in count_list_fors");
1036 return -1;
1037 }
1038
1039 /* Count the number of 'if' statements in a list comprehension.
1040
1041 Helper for ast_for_listcomp().
1042 */
1043
1044 static int
count_list_ifs(struct compiling * c,const node * n)1045 count_list_ifs(struct compiling *c, const node *n)
1046 {
1047 int n_ifs = 0;
1048
1049 count_list_iter:
1050 REQ(n, list_iter);
1051 if (TYPE(CHILD(n, 0)) == list_for)
1052 return n_ifs;
1053 n = CHILD(n, 0);
1054 REQ(n, list_if);
1055 n_ifs++;
1056 if (NCH(n) == 2)
1057 return n_ifs;
1058 n = CHILD(n, 2);
1059 goto count_list_iter;
1060 }
1061
1062 static expr_ty
ast_for_listcomp(struct compiling * c,const node * n)1063 ast_for_listcomp(struct compiling *c, const node *n)
1064 {
1065 /* listmaker: test ( list_for | (',' test)* [','] )
1066 list_for: 'for' exprlist 'in' testlist_safe [list_iter]
1067 list_iter: list_for | list_if
1068 list_if: 'if' test [list_iter]
1069 testlist_safe: test [(',' test)+ [',']]
1070 */
1071 expr_ty elt, first;
1072 asdl_seq *listcomps;
1073 int i, n_fors;
1074 node *ch;
1075
1076 REQ(n, listmaker);
1077 assert(NCH(n) > 1);
1078
1079 elt = ast_for_expr(c, CHILD(n, 0));
1080 if (!elt)
1081 return NULL;
1082
1083 n_fors = count_list_fors(c, n);
1084 if (n_fors == -1)
1085 return NULL;
1086
1087 listcomps = asdl_seq_new(n_fors, c->c_arena);
1088 if (!listcomps)
1089 return NULL;
1090
1091 ch = CHILD(n, 1);
1092 for (i = 0; i < n_fors; i++) {
1093 comprehension_ty lc;
1094 asdl_seq *t;
1095 expr_ty expression;
1096 node *for_ch;
1097
1098 REQ(ch, list_for);
1099
1100 for_ch = CHILD(ch, 1);
1101 t = ast_for_exprlist(c, for_ch, Store);
1102 if (!t)
1103 return NULL;
1104 expression = ast_for_testlist(c, CHILD(ch, 3));
1105 if (!expression)
1106 return NULL;
1107
1108 /* Check the # of children rather than the length of t, since
1109 [x for x, in ... ] has 1 element in t, but still requires a Tuple.
1110 */
1111 first = (expr_ty)asdl_seq_GET(t, 0);
1112 if (NCH(for_ch) == 1)
1113 lc = comprehension(first, expression, NULL, c->c_arena);
1114 else
1115 lc = comprehension(Tuple(t, Store, first->lineno, first->col_offset,
1116 c->c_arena),
1117 expression, NULL, c->c_arena);
1118 if (!lc)
1119 return NULL;
1120
1121 if (NCH(ch) == 5) {
1122 int j, n_ifs;
1123 asdl_seq *ifs;
1124 expr_ty list_for_expr;
1125
1126 ch = CHILD(ch, 4);
1127 n_ifs = count_list_ifs(c, ch);
1128 if (n_ifs == -1)
1129 return NULL;
1130
1131 ifs = asdl_seq_new(n_ifs, c->c_arena);
1132 if (!ifs)
1133 return NULL;
1134
1135 for (j = 0; j < n_ifs; j++) {
1136 REQ(ch, list_iter);
1137 ch = CHILD(ch, 0);
1138 REQ(ch, list_if);
1139
1140 list_for_expr = ast_for_expr(c, CHILD(ch, 1));
1141 if (!list_for_expr)
1142 return NULL;
1143
1144 asdl_seq_SET(ifs, j, list_for_expr);
1145 if (NCH(ch) == 3)
1146 ch = CHILD(ch, 2);
1147 }
1148 /* on exit, must guarantee that ch is a list_for */
1149 if (TYPE(ch) == list_iter)
1150 ch = CHILD(ch, 0);
1151 lc->ifs = ifs;
1152 }
1153 asdl_seq_SET(listcomps, i, lc);
1154 }
1155
1156 return ListComp(elt, listcomps, LINENO(n), n->n_col_offset, c->c_arena);
1157 }
1158
1159 /*
1160 Count the number of 'for' loops in a comprehension.
1161
1162 Helper for ast_for_comprehension().
1163 */
1164
1165 static int
count_comp_fors(struct compiling * c,const node * n)1166 count_comp_fors(struct compiling *c, const node *n)
1167 {
1168 int n_fors = 0;
1169
1170 count_comp_for:
1171 n_fors++;
1172 REQ(n, comp_for);
1173 if (NCH(n) == 5)
1174 n = CHILD(n, 4);
1175 else
1176 return n_fors;
1177 count_comp_iter:
1178 REQ(n, comp_iter);
1179 n = CHILD(n, 0);
1180 if (TYPE(n) == comp_for)
1181 goto count_comp_for;
1182 else if (TYPE(n) == comp_if) {
1183 if (NCH(n) == 3) {
1184 n = CHILD(n, 2);
1185 goto count_comp_iter;
1186 }
1187 else
1188 return n_fors;
1189 }
1190
1191 /* Should never be reached */
1192 PyErr_SetString(PyExc_SystemError,
1193 "logic error in count_comp_fors");
1194 return -1;
1195 }
1196
1197 /* Count the number of 'if' statements in a comprehension.
1198
1199 Helper for ast_for_comprehension().
1200 */
1201
1202 static int
count_comp_ifs(struct compiling * c,const node * n)1203 count_comp_ifs(struct compiling *c, const node *n)
1204 {
1205 int n_ifs = 0;
1206
1207 while (1) {
1208 REQ(n, comp_iter);
1209 if (TYPE(CHILD(n, 0)) == comp_for)
1210 return n_ifs;
1211 n = CHILD(n, 0);
1212 REQ(n, comp_if);
1213 n_ifs++;
1214 if (NCH(n) == 2)
1215 return n_ifs;
1216 n = CHILD(n, 2);
1217 }
1218 }
1219
1220 static asdl_seq *
ast_for_comprehension(struct compiling * c,const node * n)1221 ast_for_comprehension(struct compiling *c, const node *n)
1222 {
1223 int i, n_fors;
1224 asdl_seq *comps;
1225
1226 n_fors = count_comp_fors(c, n);
1227 if (n_fors == -1)
1228 return NULL;
1229
1230 comps = asdl_seq_new(n_fors, c->c_arena);
1231 if (!comps)
1232 return NULL;
1233
1234 for (i = 0; i < n_fors; i++) {
1235 comprehension_ty comp;
1236 asdl_seq *t;
1237 expr_ty expression, first;
1238 node *for_ch;
1239
1240 REQ(n, comp_for);
1241
1242 for_ch = CHILD(n, 1);
1243 t = ast_for_exprlist(c, for_ch, Store);
1244 if (!t)
1245 return NULL;
1246 expression = ast_for_expr(c, CHILD(n, 3));
1247 if (!expression)
1248 return NULL;
1249
1250 /* Check the # of children rather than the length of t, since
1251 (x for x, in ...) has 1 element in t, but still requires a Tuple. */
1252 first = (expr_ty)asdl_seq_GET(t, 0);
1253 if (NCH(for_ch) == 1)
1254 comp = comprehension(first, expression, NULL, c->c_arena);
1255 else
1256 comp = comprehension(Tuple(t, Store, first->lineno, first->col_offset,
1257 c->c_arena),
1258 expression, NULL, c->c_arena);
1259 if (!comp)
1260 return NULL;
1261
1262 if (NCH(n) == 5) {
1263 int j, n_ifs;
1264 asdl_seq *ifs;
1265
1266 n = CHILD(n, 4);
1267 n_ifs = count_comp_ifs(c, n);
1268 if (n_ifs == -1)
1269 return NULL;
1270
1271 ifs = asdl_seq_new(n_ifs, c->c_arena);
1272 if (!ifs)
1273 return NULL;
1274
1275 for (j = 0; j < n_ifs; j++) {
1276 REQ(n, comp_iter);
1277 n = CHILD(n, 0);
1278 REQ(n, comp_if);
1279
1280 expression = ast_for_expr(c, CHILD(n, 1));
1281 if (!expression)
1282 return NULL;
1283 asdl_seq_SET(ifs, j, expression);
1284 if (NCH(n) == 3)
1285 n = CHILD(n, 2);
1286 }
1287 /* on exit, must guarantee that n is a comp_for */
1288 if (TYPE(n) == comp_iter)
1289 n = CHILD(n, 0);
1290 comp->ifs = ifs;
1291 }
1292 asdl_seq_SET(comps, i, comp);
1293 }
1294 return comps;
1295 }
1296
1297 static expr_ty
ast_for_itercomp(struct compiling * c,const node * n,int type)1298 ast_for_itercomp(struct compiling *c, const node *n, int type)
1299 {
1300 expr_ty elt;
1301 asdl_seq *comps;
1302
1303 assert(NCH(n) > 1);
1304
1305 elt = ast_for_expr(c, CHILD(n, 0));
1306 if (!elt)
1307 return NULL;
1308
1309 comps = ast_for_comprehension(c, CHILD(n, 1));
1310 if (!comps)
1311 return NULL;
1312
1313 if (type == COMP_GENEXP)
1314 return GeneratorExp(elt, comps, LINENO(n), n->n_col_offset, c->c_arena);
1315 else if (type == COMP_SETCOMP)
1316 return SetComp(elt, comps, LINENO(n), n->n_col_offset, c->c_arena);
1317 else
1318 /* Should never happen */
1319 return NULL;
1320 }
1321
1322 static expr_ty
ast_for_dictcomp(struct compiling * c,const node * n)1323 ast_for_dictcomp(struct compiling *c, const node *n)
1324 {
1325 expr_ty key, value;
1326 asdl_seq *comps;
1327
1328 assert(NCH(n) > 3);
1329 REQ(CHILD(n, 1), COLON);
1330
1331 key = ast_for_expr(c, CHILD(n, 0));
1332 if (!key)
1333 return NULL;
1334
1335 value = ast_for_expr(c, CHILD(n, 2));
1336 if (!value)
1337 return NULL;
1338
1339 comps = ast_for_comprehension(c, CHILD(n, 3));
1340 if (!comps)
1341 return NULL;
1342
1343 return DictComp(key, value, comps, LINENO(n), n->n_col_offset, c->c_arena);
1344 }
1345
1346 static expr_ty
ast_for_genexp(struct compiling * c,const node * n)1347 ast_for_genexp(struct compiling *c, const node *n)
1348 {
1349 assert(TYPE(n) == (testlist_comp) || TYPE(n) == (argument));
1350 return ast_for_itercomp(c, n, COMP_GENEXP);
1351 }
1352
1353 static expr_ty
ast_for_setcomp(struct compiling * c,const node * n)1354 ast_for_setcomp(struct compiling *c, const node *n)
1355 {
1356 assert(TYPE(n) == (dictorsetmaker));
1357 return ast_for_itercomp(c, n, COMP_SETCOMP);
1358 }
1359
1360 static expr_ty
ast_for_atom(struct compiling * c,const node * n)1361 ast_for_atom(struct compiling *c, const node *n)
1362 {
1363 /* atom: '(' [yield_expr|testlist_comp] ')' | '[' [listmaker] ']'
1364 | '{' [dictmaker] '}' | '`' testlist '`' | NAME | NUMBER | STRING+
1365 */
1366 node *ch = CHILD(n, 0);
1367
1368 switch (TYPE(ch)) {
1369 case NAME: {
1370 /* All names start in Load context, but may later be
1371 changed. */
1372 PyObject *name = NEW_IDENTIFIER(ch);
1373 if (!name)
1374 return NULL;
1375 return Name(name, Load, LINENO(n), n->n_col_offset, c->c_arena);
1376 }
1377 case STRING: {
1378 PyObject *str = parsestrplus(c, n);
1379 if (!str) {
1380 #ifdef Py_USING_UNICODE
1381 if (PyErr_ExceptionMatches(PyExc_UnicodeError)){
1382 PyObject *type, *value, *tback, *errstr;
1383 PyErr_Fetch(&type, &value, &tback);
1384 errstr = PyObject_Str(value);
1385 if (errstr) {
1386 char *s = "";
1387 char buf[128];
1388 s = PyString_AsString(errstr);
1389 PyOS_snprintf(buf, sizeof(buf), "(unicode error) %s", s);
1390 ast_error(n, buf);
1391 Py_DECREF(errstr);
1392 } else {
1393 ast_error(n, "(unicode error) unknown error");
1394 }
1395 Py_DECREF(type);
1396 Py_DECREF(value);
1397 Py_XDECREF(tback);
1398 }
1399 #endif
1400 return NULL;
1401 }
1402 PyArena_AddPyObject(c->c_arena, str);
1403 return Str(str, LINENO(n), n->n_col_offset, c->c_arena);
1404 }
1405 case NUMBER: {
1406 PyObject *pynum = parsenumber(c, STR(ch));
1407 if (!pynum)
1408 return NULL;
1409
1410 PyArena_AddPyObject(c->c_arena, pynum);
1411 return Num(pynum, LINENO(n), n->n_col_offset, c->c_arena);
1412 }
1413 case LPAR: /* some parenthesized expressions */
1414 ch = CHILD(n, 1);
1415
1416 if (TYPE(ch) == RPAR)
1417 return Tuple(NULL, Load, LINENO(n), n->n_col_offset, c->c_arena);
1418
1419 if (TYPE(ch) == yield_expr)
1420 return ast_for_expr(c, ch);
1421
1422 return ast_for_testlist_comp(c, ch);
1423 case LSQB: /* list (or list comprehension) */
1424 ch = CHILD(n, 1);
1425
1426 if (TYPE(ch) == RSQB)
1427 return List(NULL, Load, LINENO(n), n->n_col_offset, c->c_arena);
1428
1429 REQ(ch, listmaker);
1430 if (NCH(ch) == 1 || TYPE(CHILD(ch, 1)) == COMMA) {
1431 asdl_seq *elts = seq_for_testlist(c, ch);
1432 if (!elts)
1433 return NULL;
1434
1435 return List(elts, Load, LINENO(n), n->n_col_offset, c->c_arena);
1436 }
1437 else
1438 return ast_for_listcomp(c, ch);
1439 case LBRACE: {
1440 /* dictorsetmaker:
1441 * (test ':' test (comp_for | (',' test ':' test)* [','])) |
1442 * (test (comp_for | (',' test)* [',']))
1443 */
1444 int i, size;
1445 asdl_seq *keys, *values;
1446
1447 ch = CHILD(n, 1);
1448 if (TYPE(ch) == RBRACE) {
1449 /* it's an empty dict */
1450 return Dict(NULL, NULL, LINENO(n), n->n_col_offset, c->c_arena);
1451 } else if (NCH(ch) == 1 || TYPE(CHILD(ch, 1)) == COMMA) {
1452 /* it's a simple set */
1453 asdl_seq *elts;
1454 size = (NCH(ch) + 1) / 2; /* +1 in case no trailing comma */
1455 elts = asdl_seq_new(size, c->c_arena);
1456 if (!elts)
1457 return NULL;
1458 for (i = 0; i < NCH(ch); i += 2) {
1459 expr_ty expression;
1460 expression = ast_for_expr(c, CHILD(ch, i));
1461 if (!expression)
1462 return NULL;
1463 asdl_seq_SET(elts, i / 2, expression);
1464 }
1465 return Set(elts, LINENO(n), n->n_col_offset, c->c_arena);
1466 } else if (TYPE(CHILD(ch, 1)) == comp_for) {
1467 /* it's a set comprehension */
1468 return ast_for_setcomp(c, ch);
1469 } else if (NCH(ch) > 3 && TYPE(CHILD(ch, 3)) == comp_for) {
1470 return ast_for_dictcomp(c, ch);
1471 } else {
1472 /* it's a dict */
1473 size = (NCH(ch) + 1) / 4; /* +1 in case no trailing comma */
1474 keys = asdl_seq_new(size, c->c_arena);
1475 if (!keys)
1476 return NULL;
1477
1478 values = asdl_seq_new(size, c->c_arena);
1479 if (!values)
1480 return NULL;
1481
1482 for (i = 0; i < NCH(ch); i += 4) {
1483 expr_ty expression;
1484
1485 expression = ast_for_expr(c, CHILD(ch, i));
1486 if (!expression)
1487 return NULL;
1488
1489 asdl_seq_SET(keys, i / 4, expression);
1490
1491 expression = ast_for_expr(c, CHILD(ch, i + 2));
1492 if (!expression)
1493 return NULL;
1494
1495 asdl_seq_SET(values, i / 4, expression);
1496 }
1497 return Dict(keys, values, LINENO(n), n->n_col_offset, c->c_arena);
1498 }
1499 }
1500 case BACKQUOTE: { /* repr */
1501 expr_ty expression;
1502 if (Py_Py3kWarningFlag &&
1503 !ast_warn(c, n, "backquote not supported in 3.x; use repr()"))
1504 return NULL;
1505 expression = ast_for_testlist(c, CHILD(n, 1));
1506 if (!expression)
1507 return NULL;
1508
1509 return Repr(expression, LINENO(n), n->n_col_offset, c->c_arena);
1510 }
1511 default:
1512 PyErr_Format(PyExc_SystemError, "unhandled atom %d", TYPE(ch));
1513 return NULL;
1514 }
1515 }
1516
1517 static slice_ty
ast_for_slice(struct compiling * c,const node * n)1518 ast_for_slice(struct compiling *c, const node *n)
1519 {
1520 node *ch;
1521 expr_ty lower = NULL, upper = NULL, step = NULL;
1522
1523 REQ(n, subscript);
1524
1525 /*
1526 subscript: '.' '.' '.' | test | [test] ':' [test] [sliceop]
1527 sliceop: ':' [test]
1528 */
1529 ch = CHILD(n, 0);
1530 if (TYPE(ch) == DOT)
1531 return Ellipsis(c->c_arena);
1532
1533 if (NCH(n) == 1 && TYPE(ch) == test) {
1534 /* 'step' variable hold no significance in terms of being used over
1535 other vars */
1536 step = ast_for_expr(c, ch);
1537 if (!step)
1538 return NULL;
1539
1540 return Index(step, c->c_arena);
1541 }
1542
1543 if (TYPE(ch) == test) {
1544 lower = ast_for_expr(c, ch);
1545 if (!lower)
1546 return NULL;
1547 }
1548
1549 /* If there's an upper bound it's in the second or third position. */
1550 if (TYPE(ch) == COLON) {
1551 if (NCH(n) > 1) {
1552 node *n2 = CHILD(n, 1);
1553
1554 if (TYPE(n2) == test) {
1555 upper = ast_for_expr(c, n2);
1556 if (!upper)
1557 return NULL;
1558 }
1559 }
1560 } else if (NCH(n) > 2) {
1561 node *n2 = CHILD(n, 2);
1562
1563 if (TYPE(n2) == test) {
1564 upper = ast_for_expr(c, n2);
1565 if (!upper)
1566 return NULL;
1567 }
1568 }
1569
1570 ch = CHILD(n, NCH(n) - 1);
1571 if (TYPE(ch) == sliceop) {
1572 if (NCH(ch) == 1) {
1573 /*
1574 This is an extended slice (ie "x[::]") with no expression in the
1575 step field. We set this literally to "None" in order to
1576 disambiguate it from x[:]. (The interpreter might have to call
1577 __getslice__ for x[:], but it must call __getitem__ for x[::].)
1578 */
1579 identifier none = new_identifier("None", c->c_arena);
1580 if (!none)
1581 return NULL;
1582 ch = CHILD(ch, 0);
1583 step = Name(none, Load, LINENO(ch), ch->n_col_offset, c->c_arena);
1584 if (!step)
1585 return NULL;
1586 } else {
1587 ch = CHILD(ch, 1);
1588 if (TYPE(ch) == test) {
1589 step = ast_for_expr(c, ch);
1590 if (!step)
1591 return NULL;
1592 }
1593 }
1594 }
1595
1596 return Slice(lower, upper, step, c->c_arena);
1597 }
1598
1599 static expr_ty
ast_for_binop(struct compiling * c,const node * n)1600 ast_for_binop(struct compiling *c, const node *n)
1601 {
1602 /* Must account for a sequence of expressions.
1603 How should A op B op C by represented?
1604 BinOp(BinOp(A, op, B), op, C).
1605 */
1606
1607 int i, nops;
1608 expr_ty expr1, expr2, result;
1609 operator_ty newoperator;
1610
1611 expr1 = ast_for_expr(c, CHILD(n, 0));
1612 if (!expr1)
1613 return NULL;
1614
1615 expr2 = ast_for_expr(c, CHILD(n, 2));
1616 if (!expr2)
1617 return NULL;
1618
1619 newoperator = get_operator(CHILD(n, 1));
1620 if (!newoperator)
1621 return NULL;
1622
1623 result = BinOp(expr1, newoperator, expr2, LINENO(n), n->n_col_offset,
1624 c->c_arena);
1625 if (!result)
1626 return NULL;
1627
1628 nops = (NCH(n) - 1) / 2;
1629 for (i = 1; i < nops; i++) {
1630 expr_ty tmp_result, tmp;
1631 const node* next_oper = CHILD(n, i * 2 + 1);
1632
1633 newoperator = get_operator(next_oper);
1634 if (!newoperator)
1635 return NULL;
1636
1637 tmp = ast_for_expr(c, CHILD(n, i * 2 + 2));
1638 if (!tmp)
1639 return NULL;
1640
1641 tmp_result = BinOp(result, newoperator, tmp,
1642 LINENO(next_oper), next_oper->n_col_offset,
1643 c->c_arena);
1644 if (!tmp_result)
1645 return NULL;
1646 result = tmp_result;
1647 }
1648 return result;
1649 }
1650
1651 static expr_ty
ast_for_trailer(struct compiling * c,const node * n,expr_ty left_expr)1652 ast_for_trailer(struct compiling *c, const node *n, expr_ty left_expr)
1653 {
1654 /* trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
1655 subscriptlist: subscript (',' subscript)* [',']
1656 subscript: '.' '.' '.' | test | [test] ':' [test] [sliceop]
1657 */
1658 REQ(n, trailer);
1659 if (TYPE(CHILD(n, 0)) == LPAR) {
1660 if (NCH(n) == 2)
1661 return Call(left_expr, NULL, NULL, NULL, NULL, LINENO(n),
1662 n->n_col_offset, c->c_arena);
1663 else
1664 return ast_for_call(c, CHILD(n, 1), left_expr);
1665 }
1666 else if (TYPE(CHILD(n, 0)) == DOT ) {
1667 PyObject *attr_id = NEW_IDENTIFIER(CHILD(n, 1));
1668 if (!attr_id)
1669 return NULL;
1670 return Attribute(left_expr, attr_id, Load,
1671 LINENO(n), n->n_col_offset, c->c_arena);
1672 }
1673 else {
1674 REQ(CHILD(n, 0), LSQB);
1675 REQ(CHILD(n, 2), RSQB);
1676 n = CHILD(n, 1);
1677 if (NCH(n) == 1) {
1678 slice_ty slc = ast_for_slice(c, CHILD(n, 0));
1679 if (!slc)
1680 return NULL;
1681 return Subscript(left_expr, slc, Load, LINENO(n), n->n_col_offset,
1682 c->c_arena);
1683 }
1684 else {
1685 /* The grammar is ambiguous here. The ambiguity is resolved
1686 by treating the sequence as a tuple literal if there are
1687 no slice features.
1688 */
1689 int j;
1690 slice_ty slc;
1691 expr_ty e;
1692 bool simple = true;
1693 asdl_seq *slices, *elts;
1694 slices = asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
1695 if (!slices)
1696 return NULL;
1697 for (j = 0; j < NCH(n); j += 2) {
1698 slc = ast_for_slice(c, CHILD(n, j));
1699 if (!slc)
1700 return NULL;
1701 if (slc->kind != Index_kind)
1702 simple = false;
1703 asdl_seq_SET(slices, j / 2, slc);
1704 }
1705 if (!simple) {
1706 return Subscript(left_expr, ExtSlice(slices, c->c_arena),
1707 Load, LINENO(n), n->n_col_offset, c->c_arena);
1708 }
1709 /* extract Index values and put them in a Tuple */
1710 elts = asdl_seq_new(asdl_seq_LEN(slices), c->c_arena);
1711 if (!elts)
1712 return NULL;
1713 for (j = 0; j < asdl_seq_LEN(slices); ++j) {
1714 slc = (slice_ty)asdl_seq_GET(slices, j);
1715 assert(slc->kind == Index_kind && slc->v.Index.value);
1716 asdl_seq_SET(elts, j, slc->v.Index.value);
1717 }
1718 e = Tuple(elts, Load, LINENO(n), n->n_col_offset, c->c_arena);
1719 if (!e)
1720 return NULL;
1721 return Subscript(left_expr, Index(e, c->c_arena),
1722 Load, LINENO(n), n->n_col_offset, c->c_arena);
1723 }
1724 }
1725 }
1726
1727 static expr_ty
ast_for_factor(struct compiling * c,const node * n)1728 ast_for_factor(struct compiling *c, const node *n)
1729 {
1730 node *pfactor, *ppower, *patom, *pnum;
1731 expr_ty expression;
1732
1733 /* If the unary - operator is applied to a constant, don't generate
1734 a UNARY_NEGATIVE opcode. Just store the approriate value as a
1735 constant. The peephole optimizer already does something like
1736 this but it doesn't handle the case where the constant is
1737 (sys.maxint - 1). In that case, we want a PyIntObject, not a
1738 PyLongObject.
1739 */
1740 if (TYPE(CHILD(n, 0)) == MINUS &&
1741 NCH(n) == 2 &&
1742 TYPE((pfactor = CHILD(n, 1))) == factor &&
1743 NCH(pfactor) == 1 &&
1744 TYPE((ppower = CHILD(pfactor, 0))) == power &&
1745 NCH(ppower) == 1 &&
1746 TYPE((patom = CHILD(ppower, 0))) == atom &&
1747 TYPE((pnum = CHILD(patom, 0))) == NUMBER) {
1748 PyObject *pynum;
1749 char *s = PyObject_MALLOC(strlen(STR(pnum)) + 2);
1750 if (s == NULL)
1751 return NULL;
1752 s[0] = '-';
1753 strcpy(s + 1, STR(pnum));
1754 pynum = parsenumber(c, s);
1755 PyObject_FREE(s);
1756 if (!pynum)
1757 return NULL;
1758
1759 PyArena_AddPyObject(c->c_arena, pynum);
1760 return Num(pynum, LINENO(n), n->n_col_offset, c->c_arena);
1761 }
1762
1763 expression = ast_for_expr(c, CHILD(n, 1));
1764 if (!expression)
1765 return NULL;
1766
1767 switch (TYPE(CHILD(n, 0))) {
1768 case PLUS:
1769 return UnaryOp(UAdd, expression, LINENO(n), n->n_col_offset,
1770 c->c_arena);
1771 case MINUS:
1772 return UnaryOp(USub, expression, LINENO(n), n->n_col_offset,
1773 c->c_arena);
1774 case TILDE:
1775 return UnaryOp(Invert, expression, LINENO(n),
1776 n->n_col_offset, c->c_arena);
1777 }
1778 PyErr_Format(PyExc_SystemError, "unhandled factor: %d",
1779 TYPE(CHILD(n, 0)));
1780 return NULL;
1781 }
1782
1783 static expr_ty
ast_for_power(struct compiling * c,const node * n)1784 ast_for_power(struct compiling *c, const node *n)
1785 {
1786 /* power: atom trailer* ('**' factor)*
1787 */
1788 int i;
1789 expr_ty e, tmp;
1790 REQ(n, power);
1791 e = ast_for_atom(c, CHILD(n, 0));
1792 if (!e)
1793 return NULL;
1794 if (NCH(n) == 1)
1795 return e;
1796 for (i = 1; i < NCH(n); i++) {
1797 node *ch = CHILD(n, i);
1798 if (TYPE(ch) != trailer)
1799 break;
1800 tmp = ast_for_trailer(c, ch, e);
1801 if (!tmp)
1802 return NULL;
1803 tmp->lineno = e->lineno;
1804 tmp->col_offset = e->col_offset;
1805 e = tmp;
1806 }
1807 if (TYPE(CHILD(n, NCH(n) - 1)) == factor) {
1808 expr_ty f = ast_for_expr(c, CHILD(n, NCH(n) - 1));
1809 if (!f)
1810 return NULL;
1811 tmp = BinOp(e, Pow, f, LINENO(n), n->n_col_offset, c->c_arena);
1812 if (!tmp)
1813 return NULL;
1814 e = tmp;
1815 }
1816 return e;
1817 }
1818
1819 /* Do not name a variable 'expr'! Will cause a compile error.
1820 */
1821
1822 static expr_ty
ast_for_expr(struct compiling * c,const node * n)1823 ast_for_expr(struct compiling *c, const node *n)
1824 {
1825 /* handle the full range of simple expressions
1826 test: or_test ['if' or_test 'else' test] | lambdef
1827 or_test: and_test ('or' and_test)*
1828 and_test: not_test ('and' not_test)*
1829 not_test: 'not' not_test | comparison
1830 comparison: expr (comp_op expr)*
1831 expr: xor_expr ('|' xor_expr)*
1832 xor_expr: and_expr ('^' and_expr)*
1833 and_expr: shift_expr ('&' shift_expr)*
1834 shift_expr: arith_expr (('<<'|'>>') arith_expr)*
1835 arith_expr: term (('+'|'-') term)*
1836 term: factor (('*'|'/'|'%'|'//') factor)*
1837 factor: ('+'|'-'|'~') factor | power
1838 power: atom trailer* ('**' factor)*
1839
1840 As well as modified versions that exist for backward compatibility,
1841 to explicitly allow:
1842 [ x for x in lambda: 0, lambda: 1 ]
1843 (which would be ambiguous without these extra rules)
1844
1845 old_test: or_test | old_lambdef
1846 old_lambdef: 'lambda' [vararglist] ':' old_test
1847
1848 */
1849
1850 asdl_seq *seq;
1851 int i;
1852
1853 loop:
1854 switch (TYPE(n)) {
1855 case test:
1856 case old_test:
1857 if (TYPE(CHILD(n, 0)) == lambdef ||
1858 TYPE(CHILD(n, 0)) == old_lambdef)
1859 return ast_for_lambdef(c, CHILD(n, 0));
1860 else if (NCH(n) > 1)
1861 return ast_for_ifexpr(c, n);
1862 /* Fallthrough */
1863 case or_test:
1864 case and_test:
1865 if (NCH(n) == 1) {
1866 n = CHILD(n, 0);
1867 goto loop;
1868 }
1869 seq = asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
1870 if (!seq)
1871 return NULL;
1872 for (i = 0; i < NCH(n); i += 2) {
1873 expr_ty e = ast_for_expr(c, CHILD(n, i));
1874 if (!e)
1875 return NULL;
1876 asdl_seq_SET(seq, i / 2, e);
1877 }
1878 if (!strcmp(STR(CHILD(n, 1)), "and"))
1879 return BoolOp(And, seq, LINENO(n), n->n_col_offset,
1880 c->c_arena);
1881 assert(!strcmp(STR(CHILD(n, 1)), "or"));
1882 return BoolOp(Or, seq, LINENO(n), n->n_col_offset, c->c_arena);
1883 case not_test:
1884 if (NCH(n) == 1) {
1885 n = CHILD(n, 0);
1886 goto loop;
1887 }
1888 else {
1889 expr_ty expression = ast_for_expr(c, CHILD(n, 1));
1890 if (!expression)
1891 return NULL;
1892
1893 return UnaryOp(Not, expression, LINENO(n), n->n_col_offset,
1894 c->c_arena);
1895 }
1896 case comparison:
1897 if (NCH(n) == 1) {
1898 n = CHILD(n, 0);
1899 goto loop;
1900 }
1901 else {
1902 expr_ty expression;
1903 asdl_int_seq *ops;
1904 asdl_seq *cmps;
1905 ops = asdl_int_seq_new(NCH(n) / 2, c->c_arena);
1906 if (!ops)
1907 return NULL;
1908 cmps = asdl_seq_new(NCH(n) / 2, c->c_arena);
1909 if (!cmps) {
1910 return NULL;
1911 }
1912 for (i = 1; i < NCH(n); i += 2) {
1913 cmpop_ty newoperator;
1914
1915 newoperator = ast_for_comp_op(c, CHILD(n, i));
1916 if (!newoperator) {
1917 return NULL;
1918 }
1919
1920 expression = ast_for_expr(c, CHILD(n, i + 1));
1921 if (!expression) {
1922 return NULL;
1923 }
1924
1925 asdl_seq_SET(ops, i / 2, newoperator);
1926 asdl_seq_SET(cmps, i / 2, expression);
1927 }
1928 expression = ast_for_expr(c, CHILD(n, 0));
1929 if (!expression) {
1930 return NULL;
1931 }
1932
1933 return Compare(expression, ops, cmps, LINENO(n),
1934 n->n_col_offset, c->c_arena);
1935 }
1936 break;
1937
1938 /* The next five cases all handle BinOps. The main body of code
1939 is the same in each case, but the switch turned inside out to
1940 reuse the code for each type of operator.
1941 */
1942 case expr:
1943 case xor_expr:
1944 case and_expr:
1945 case shift_expr:
1946 case arith_expr:
1947 case term:
1948 if (NCH(n) == 1) {
1949 n = CHILD(n, 0);
1950 goto loop;
1951 }
1952 return ast_for_binop(c, n);
1953 case yield_expr: {
1954 expr_ty exp = NULL;
1955 if (NCH(n) == 2) {
1956 exp = ast_for_testlist(c, CHILD(n, 1));
1957 if (!exp)
1958 return NULL;
1959 }
1960 return Yield(exp, LINENO(n), n->n_col_offset, c->c_arena);
1961 }
1962 case factor:
1963 if (NCH(n) == 1) {
1964 n = CHILD(n, 0);
1965 goto loop;
1966 }
1967 return ast_for_factor(c, n);
1968 case power:
1969 return ast_for_power(c, n);
1970 default:
1971 PyErr_Format(PyExc_SystemError, "unhandled expr: %d", TYPE(n));
1972 return NULL;
1973 }
1974 /* should never get here unless if error is set */
1975 return NULL;
1976 }
1977
1978 static expr_ty
ast_for_call(struct compiling * c,const node * n,expr_ty func)1979 ast_for_call(struct compiling *c, const node *n, expr_ty func)
1980 {
1981 /*
1982 arglist: (argument ',')* (argument [',']| '*' test [',' '**' test]
1983 | '**' test)
1984 argument: [test '='] test [comp_for] # Really [keyword '='] test
1985 */
1986
1987 int i, nargs, nkeywords, ngens;
1988 asdl_seq *args;
1989 asdl_seq *keywords;
1990 expr_ty vararg = NULL, kwarg = NULL;
1991
1992 REQ(n, arglist);
1993
1994 nargs = 0;
1995 nkeywords = 0;
1996 ngens = 0;
1997 for (i = 0; i < NCH(n); i++) {
1998 node *ch = CHILD(n, i);
1999 if (TYPE(ch) == argument) {
2000 if (NCH(ch) == 1)
2001 nargs++;
2002 else if (TYPE(CHILD(ch, 1)) == comp_for)
2003 ngens++;
2004 else
2005 nkeywords++;
2006 }
2007 }
2008 if (ngens > 1 || (ngens && (nargs || nkeywords))) {
2009 ast_error(n, "Generator expression must be parenthesized "
2010 "if not sole argument");
2011 return NULL;
2012 }
2013
2014 if (nargs + nkeywords + ngens > 255) {
2015 ast_error(n, "more than 255 arguments");
2016 return NULL;
2017 }
2018
2019 args = asdl_seq_new(nargs + ngens, c->c_arena);
2020 if (!args)
2021 return NULL;
2022 keywords = asdl_seq_new(nkeywords, c->c_arena);
2023 if (!keywords)
2024 return NULL;
2025 nargs = 0;
2026 nkeywords = 0;
2027 for (i = 0; i < NCH(n); i++) {
2028 node *ch = CHILD(n, i);
2029 if (TYPE(ch) == argument) {
2030 expr_ty e;
2031 if (NCH(ch) == 1) {
2032 if (nkeywords) {
2033 ast_error(CHILD(ch, 0),
2034 "non-keyword arg after keyword arg");
2035 return NULL;
2036 }
2037 if (vararg) {
2038 ast_error(CHILD(ch, 0),
2039 "only named arguments may follow *expression");
2040 return NULL;
2041 }
2042 e = ast_for_expr(c, CHILD(ch, 0));
2043 if (!e)
2044 return NULL;
2045 asdl_seq_SET(args, nargs++, e);
2046 }
2047 else if (TYPE(CHILD(ch, 1)) == comp_for) {
2048 e = ast_for_genexp(c, ch);
2049 if (!e)
2050 return NULL;
2051 asdl_seq_SET(args, nargs++, e);
2052 }
2053 else {
2054 keyword_ty kw;
2055 identifier key;
2056 int k;
2057 char *tmp;
2058
2059 /* CHILD(ch, 0) is test, but must be an identifier? */
2060 e = ast_for_expr(c, CHILD(ch, 0));
2061 if (!e)
2062 return NULL;
2063 /* f(lambda x: x[0] = 3) ends up getting parsed with
2064 * LHS test = lambda x: x[0], and RHS test = 3.
2065 * SF bug 132313 points out that complaining about a keyword
2066 * then is very confusing.
2067 */
2068 if (e->kind == Lambda_kind) {
2069 ast_error(CHILD(ch, 0),
2070 "lambda cannot contain assignment");
2071 return NULL;
2072 } else if (e->kind != Name_kind) {
2073 ast_error(CHILD(ch, 0), "keyword can't be an expression");
2074 return NULL;
2075 }
2076 key = e->v.Name.id;
2077 if (!forbidden_check(c, CHILD(ch, 0), PyBytes_AS_STRING(key)))
2078 return NULL;
2079 for (k = 0; k < nkeywords; k++) {
2080 tmp = PyString_AS_STRING(
2081 ((keyword_ty)asdl_seq_GET(keywords, k))->arg);
2082 if (!strcmp(tmp, PyString_AS_STRING(key))) {
2083 ast_error(CHILD(ch, 0), "keyword argument repeated");
2084 return NULL;
2085 }
2086 }
2087 e = ast_for_expr(c, CHILD(ch, 2));
2088 if (!e)
2089 return NULL;
2090 kw = keyword(key, e, c->c_arena);
2091 if (!kw)
2092 return NULL;
2093 asdl_seq_SET(keywords, nkeywords++, kw);
2094 }
2095 }
2096 else if (TYPE(ch) == STAR) {
2097 vararg = ast_for_expr(c, CHILD(n, i+1));
2098 if (!vararg)
2099 return NULL;
2100 i++;
2101 }
2102 else if (TYPE(ch) == DOUBLESTAR) {
2103 kwarg = ast_for_expr(c, CHILD(n, i+1));
2104 if (!kwarg)
2105 return NULL;
2106 i++;
2107 }
2108 }
2109
2110 return Call(func, args, keywords, vararg, kwarg, func->lineno,
2111 func->col_offset, c->c_arena);
2112 }
2113
2114 static expr_ty
ast_for_testlist(struct compiling * c,const node * n)2115 ast_for_testlist(struct compiling *c, const node* n)
2116 {
2117 /* testlist_comp: test (',' test)* [','] */
2118 /* testlist: test (',' test)* [','] */
2119 /* testlist_safe: test (',' test)+ [','] */
2120 /* testlist1: test (',' test)* */
2121 assert(NCH(n) > 0);
2122 if (TYPE(n) == testlist_comp) {
2123 if (NCH(n) > 1)
2124 assert(TYPE(CHILD(n, 1)) != comp_for);
2125 }
2126 else {
2127 assert(TYPE(n) == testlist ||
2128 TYPE(n) == testlist_safe ||
2129 TYPE(n) == testlist1);
2130 }
2131 if (NCH(n) == 1)
2132 return ast_for_expr(c, CHILD(n, 0));
2133 else {
2134 asdl_seq *tmp = seq_for_testlist(c, n);
2135 if (!tmp)
2136 return NULL;
2137 return Tuple(tmp, Load, LINENO(n), n->n_col_offset, c->c_arena);
2138 }
2139 }
2140
2141 static expr_ty
ast_for_testlist_comp(struct compiling * c,const node * n)2142 ast_for_testlist_comp(struct compiling *c, const node* n)
2143 {
2144 /* testlist_comp: test ( comp_for | (',' test)* [','] ) */
2145 /* argument: test [ comp_for ] */
2146 assert(TYPE(n) == testlist_comp || TYPE(n) == argument);
2147 if (NCH(n) > 1 && TYPE(CHILD(n, 1)) == comp_for)
2148 return ast_for_genexp(c, n);
2149 return ast_for_testlist(c, n);
2150 }
2151
2152 /* like ast_for_testlist() but returns a sequence */
2153 static asdl_seq*
ast_for_class_bases(struct compiling * c,const node * n)2154 ast_for_class_bases(struct compiling *c, const node* n)
2155 {
2156 /* testlist: test (',' test)* [','] */
2157 assert(NCH(n) > 0);
2158 REQ(n, testlist);
2159 if (NCH(n) == 1) {
2160 expr_ty base;
2161 asdl_seq *bases = asdl_seq_new(1, c->c_arena);
2162 if (!bases)
2163 return NULL;
2164 base = ast_for_expr(c, CHILD(n, 0));
2165 if (!base)
2166 return NULL;
2167 asdl_seq_SET(bases, 0, base);
2168 return bases;
2169 }
2170
2171 return seq_for_testlist(c, n);
2172 }
2173
2174 static stmt_ty
ast_for_expr_stmt(struct compiling * c,const node * n)2175 ast_for_expr_stmt(struct compiling *c, const node *n)
2176 {
2177 REQ(n, expr_stmt);
2178 /* expr_stmt: testlist (augassign (yield_expr|testlist)
2179 | ('=' (yield_expr|testlist))*)
2180 testlist: test (',' test)* [',']
2181 augassign: '+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^='
2182 | '<<=' | '>>=' | '**=' | '//='
2183 test: ... here starts the operator precedence dance
2184 */
2185
2186 if (NCH(n) == 1) {
2187 expr_ty e = ast_for_testlist(c, CHILD(n, 0));
2188 if (!e)
2189 return NULL;
2190
2191 return Expr(e, LINENO(n), n->n_col_offset, c->c_arena);
2192 }
2193 else if (TYPE(CHILD(n, 1)) == augassign) {
2194 expr_ty expr1, expr2;
2195 operator_ty newoperator;
2196 node *ch = CHILD(n, 0);
2197
2198 expr1 = ast_for_testlist(c, ch);
2199 if (!expr1)
2200 return NULL;
2201 if(!set_context(c, expr1, Store, ch))
2202 return NULL;
2203 /* set_context checks that most expressions are not the left side.
2204 Augmented assignments can only have a name, a subscript, or an
2205 attribute on the left, though, so we have to explicitly check for
2206 those. */
2207 switch (expr1->kind) {
2208 case Name_kind:
2209 case Attribute_kind:
2210 case Subscript_kind:
2211 break;
2212 default:
2213 ast_error(ch, "illegal expression for augmented assignment");
2214 return NULL;
2215 }
2216
2217 ch = CHILD(n, 2);
2218 if (TYPE(ch) == testlist)
2219 expr2 = ast_for_testlist(c, ch);
2220 else
2221 expr2 = ast_for_expr(c, ch);
2222 if (!expr2)
2223 return NULL;
2224
2225 newoperator = ast_for_augassign(c, CHILD(n, 1));
2226 if (!newoperator)
2227 return NULL;
2228
2229 return AugAssign(expr1, newoperator, expr2, LINENO(n), n->n_col_offset,
2230 c->c_arena);
2231 }
2232 else {
2233 int i;
2234 asdl_seq *targets;
2235 node *value;
2236 expr_ty expression;
2237
2238 /* a normal assignment */
2239 REQ(CHILD(n, 1), EQUAL);
2240 targets = asdl_seq_new(NCH(n) / 2, c->c_arena);
2241 if (!targets)
2242 return NULL;
2243 for (i = 0; i < NCH(n) - 2; i += 2) {
2244 expr_ty e;
2245 node *ch = CHILD(n, i);
2246 if (TYPE(ch) == yield_expr) {
2247 ast_error(ch, "assignment to yield expression not possible");
2248 return NULL;
2249 }
2250 e = ast_for_testlist(c, ch);
2251 if (!e)
2252 return NULL;
2253
2254 /* set context to assign */
2255 if (!set_context(c, e, Store, CHILD(n, i)))
2256 return NULL;
2257
2258 asdl_seq_SET(targets, i / 2, e);
2259 }
2260 value = CHILD(n, NCH(n) - 1);
2261 if (TYPE(value) == testlist)
2262 expression = ast_for_testlist(c, value);
2263 else
2264 expression = ast_for_expr(c, value);
2265 if (!expression)
2266 return NULL;
2267 return Assign(targets, expression, LINENO(n), n->n_col_offset,
2268 c->c_arena);
2269 }
2270 }
2271
2272 static stmt_ty
ast_for_print_stmt(struct compiling * c,const node * n)2273 ast_for_print_stmt(struct compiling *c, const node *n)
2274 {
2275 /* print_stmt: 'print' ( [ test (',' test)* [','] ]
2276 | '>>' test [ (',' test)+ [','] ] )
2277 */
2278 expr_ty dest = NULL, expression;
2279 asdl_seq *seq = NULL;
2280 bool nl;
2281 int i, j, values_count, start = 1;
2282
2283 REQ(n, print_stmt);
2284 if (NCH(n) >= 2 && TYPE(CHILD(n, 1)) == RIGHTSHIFT) {
2285 dest = ast_for_expr(c, CHILD(n, 2));
2286 if (!dest)
2287 return NULL;
2288 start = 4;
2289 }
2290 values_count = (NCH(n) + 1 - start) / 2;
2291 if (values_count) {
2292 seq = asdl_seq_new(values_count, c->c_arena);
2293 if (!seq)
2294 return NULL;
2295 for (i = start, j = 0; i < NCH(n); i += 2, ++j) {
2296 expression = ast_for_expr(c, CHILD(n, i));
2297 if (!expression)
2298 return NULL;
2299 asdl_seq_SET(seq, j, expression);
2300 }
2301 }
2302 nl = (TYPE(CHILD(n, NCH(n) - 1)) == COMMA) ? false : true;
2303 return Print(dest, seq, nl, LINENO(n), n->n_col_offset, c->c_arena);
2304 }
2305
2306 static asdl_seq *
ast_for_exprlist(struct compiling * c,const node * n,expr_context_ty context)2307 ast_for_exprlist(struct compiling *c, const node *n, expr_context_ty context)
2308 {
2309 asdl_seq *seq;
2310 int i;
2311 expr_ty e;
2312
2313 REQ(n, exprlist);
2314
2315 seq = asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
2316 if (!seq)
2317 return NULL;
2318 for (i = 0; i < NCH(n); i += 2) {
2319 e = ast_for_expr(c, CHILD(n, i));
2320 if (!e)
2321 return NULL;
2322 asdl_seq_SET(seq, i / 2, e);
2323 if (context && !set_context(c, e, context, CHILD(n, i)))
2324 return NULL;
2325 }
2326 return seq;
2327 }
2328
2329 static stmt_ty
ast_for_del_stmt(struct compiling * c,const node * n)2330 ast_for_del_stmt(struct compiling *c, const node *n)
2331 {
2332 asdl_seq *expr_list;
2333
2334 /* del_stmt: 'del' exprlist */
2335 REQ(n, del_stmt);
2336
2337 expr_list = ast_for_exprlist(c, CHILD(n, 1), Del);
2338 if (!expr_list)
2339 return NULL;
2340 return Delete(expr_list, LINENO(n), n->n_col_offset, c->c_arena);
2341 }
2342
2343 static stmt_ty
ast_for_flow_stmt(struct compiling * c,const node * n)2344 ast_for_flow_stmt(struct compiling *c, const node *n)
2345 {
2346 /*
2347 flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt
2348 | yield_stmt
2349 break_stmt: 'break'
2350 continue_stmt: 'continue'
2351 return_stmt: 'return' [testlist]
2352 yield_stmt: yield_expr
2353 yield_expr: 'yield' testlist
2354 raise_stmt: 'raise' [test [',' test [',' test]]]
2355 */
2356 node *ch;
2357
2358 REQ(n, flow_stmt);
2359 ch = CHILD(n, 0);
2360 switch (TYPE(ch)) {
2361 case break_stmt:
2362 return Break(LINENO(n), n->n_col_offset, c->c_arena);
2363 case continue_stmt:
2364 return Continue(LINENO(n), n->n_col_offset, c->c_arena);
2365 case yield_stmt: { /* will reduce to yield_expr */
2366 expr_ty exp = ast_for_expr(c, CHILD(ch, 0));
2367 if (!exp)
2368 return NULL;
2369 return Expr(exp, LINENO(n), n->n_col_offset, c->c_arena);
2370 }
2371 case return_stmt:
2372 if (NCH(ch) == 1)
2373 return Return(NULL, LINENO(n), n->n_col_offset, c->c_arena);
2374 else {
2375 expr_ty expression = ast_for_testlist(c, CHILD(ch, 1));
2376 if (!expression)
2377 return NULL;
2378 return Return(expression, LINENO(n), n->n_col_offset,
2379 c->c_arena);
2380 }
2381 case raise_stmt:
2382 if (NCH(ch) == 1)
2383 return Raise(NULL, NULL, NULL, LINENO(n), n->n_col_offset,
2384 c->c_arena);
2385 else if (NCH(ch) == 2) {
2386 expr_ty expression = ast_for_expr(c, CHILD(ch, 1));
2387 if (!expression)
2388 return NULL;
2389 return Raise(expression, NULL, NULL, LINENO(n),
2390 n->n_col_offset, c->c_arena);
2391 }
2392 else if (NCH(ch) == 4) {
2393 expr_ty expr1, expr2;
2394
2395 expr1 = ast_for_expr(c, CHILD(ch, 1));
2396 if (!expr1)
2397 return NULL;
2398 expr2 = ast_for_expr(c, CHILD(ch, 3));
2399 if (!expr2)
2400 return NULL;
2401
2402 return Raise(expr1, expr2, NULL, LINENO(n), n->n_col_offset,
2403 c->c_arena);
2404 }
2405 else if (NCH(ch) == 6) {
2406 expr_ty expr1, expr2, expr3;
2407
2408 expr1 = ast_for_expr(c, CHILD(ch, 1));
2409 if (!expr1)
2410 return NULL;
2411 expr2 = ast_for_expr(c, CHILD(ch, 3));
2412 if (!expr2)
2413 return NULL;
2414 expr3 = ast_for_expr(c, CHILD(ch, 5));
2415 if (!expr3)
2416 return NULL;
2417
2418 return Raise(expr1, expr2, expr3, LINENO(n), n->n_col_offset,
2419 c->c_arena);
2420 }
2421 default:
2422 PyErr_Format(PyExc_SystemError,
2423 "unexpected flow_stmt: %d", TYPE(ch));
2424 return NULL;
2425 }
2426
2427 PyErr_SetString(PyExc_SystemError, "unhandled flow statement");
2428 return NULL;
2429 }
2430
2431 static alias_ty
alias_for_import_name(struct compiling * c,const node * n,int store)2432 alias_for_import_name(struct compiling *c, const node *n, int store)
2433 {
2434 /*
2435 import_as_name: NAME ['as' NAME]
2436 dotted_as_name: dotted_name ['as' NAME]
2437 dotted_name: NAME ('.' NAME)*
2438 */
2439 PyObject *str, *name;
2440
2441 loop:
2442 switch (TYPE(n)) {
2443 case import_as_name: {
2444 node *name_node = CHILD(n, 0);
2445 str = NULL;
2446 if (NCH(n) == 3) {
2447 node *str_node = CHILD(n, 2);
2448 if (store && !forbidden_check(c, str_node, STR(str_node)))
2449 return NULL;
2450 str = NEW_IDENTIFIER(str_node);
2451 if (!str)
2452 return NULL;
2453 }
2454 else {
2455 if (!forbidden_check(c, name_node, STR(name_node)))
2456 return NULL;
2457 }
2458 name = NEW_IDENTIFIER(name_node);
2459 if (!name)
2460 return NULL;
2461 return alias(name, str, c->c_arena);
2462 }
2463 case dotted_as_name:
2464 if (NCH(n) == 1) {
2465 n = CHILD(n, 0);
2466 goto loop;
2467 }
2468 else {
2469 node *asname_node = CHILD(n, 2);
2470 alias_ty a = alias_for_import_name(c, CHILD(n, 0), 0);
2471 if (!a)
2472 return NULL;
2473 assert(!a->asname);
2474 if (!forbidden_check(c, asname_node, STR(asname_node)))
2475 return NULL;
2476 a->asname = NEW_IDENTIFIER(asname_node);
2477 if (!a->asname)
2478 return NULL;
2479 return a;
2480 }
2481 break;
2482 case dotted_name:
2483 if (NCH(n) == 1) {
2484 node *name_node = CHILD(n, 0);
2485 if (store && !forbidden_check(c, name_node, STR(name_node)))
2486 return NULL;
2487 name = NEW_IDENTIFIER(name_node);
2488 if (!name)
2489 return NULL;
2490 return alias(name, NULL, c->c_arena);
2491 }
2492 else {
2493 /* Create a string of the form "a.b.c" */
2494 int i;
2495 size_t len;
2496 char *s;
2497
2498 len = 0;
2499 for (i = 0; i < NCH(n); i += 2)
2500 /* length of string plus one for the dot */
2501 len += strlen(STR(CHILD(n, i))) + 1;
2502 len--; /* the last name doesn't have a dot */
2503 str = PyString_FromStringAndSize(NULL, len);
2504 if (!str)
2505 return NULL;
2506 s = PyString_AS_STRING(str);
2507 if (!s)
2508 return NULL;
2509 for (i = 0; i < NCH(n); i += 2) {
2510 char *sch = STR(CHILD(n, i));
2511 strcpy(s, STR(CHILD(n, i)));
2512 s += strlen(sch);
2513 *s++ = '.';
2514 }
2515 --s;
2516 *s = '\0';
2517 PyString_InternInPlace(&str);
2518 PyArena_AddPyObject(c->c_arena, str);
2519 return alias(str, NULL, c->c_arena);
2520 }
2521 break;
2522 case STAR:
2523 str = PyString_InternFromString("*");
2524 if (!str)
2525 return NULL;
2526 PyArena_AddPyObject(c->c_arena, str);
2527 return alias(str, NULL, c->c_arena);
2528 default:
2529 PyErr_Format(PyExc_SystemError,
2530 "unexpected import name: %d", TYPE(n));
2531 return NULL;
2532 }
2533
2534 PyErr_SetString(PyExc_SystemError, "unhandled import name condition");
2535 return NULL;
2536 }
2537
2538 static stmt_ty
ast_for_import_stmt(struct compiling * c,const node * n)2539 ast_for_import_stmt(struct compiling *c, const node *n)
2540 {
2541 /*
2542 import_stmt: import_name | import_from
2543 import_name: 'import' dotted_as_names
2544 import_from: 'from' ('.'* dotted_name | '.') 'import'
2545 ('*' | '(' import_as_names ')' | import_as_names)
2546 */
2547 int lineno;
2548 int col_offset;
2549 int i;
2550 asdl_seq *aliases;
2551
2552 REQ(n, import_stmt);
2553 lineno = LINENO(n);
2554 col_offset = n->n_col_offset;
2555 n = CHILD(n, 0);
2556 if (TYPE(n) == import_name) {
2557 n = CHILD(n, 1);
2558 REQ(n, dotted_as_names);
2559 aliases = asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
2560 if (!aliases)
2561 return NULL;
2562 for (i = 0; i < NCH(n); i += 2) {
2563 alias_ty import_alias = alias_for_import_name(c, CHILD(n, i), 1);
2564 if (!import_alias)
2565 return NULL;
2566 asdl_seq_SET(aliases, i / 2, import_alias);
2567 }
2568 return Import(aliases, lineno, col_offset, c->c_arena);
2569 }
2570 else if (TYPE(n) == import_from) {
2571 int n_children;
2572 int idx, ndots = 0;
2573 alias_ty mod = NULL;
2574 identifier modname = NULL;
2575
2576 /* Count the number of dots (for relative imports) and check for the
2577 optional module name */
2578 for (idx = 1; idx < NCH(n); idx++) {
2579 if (TYPE(CHILD(n, idx)) == dotted_name) {
2580 mod = alias_for_import_name(c, CHILD(n, idx), 0);
2581 if (!mod)
2582 return NULL;
2583 idx++;
2584 break;
2585 } else if (TYPE(CHILD(n, idx)) != DOT) {
2586 break;
2587 }
2588 ndots++;
2589 }
2590 idx++; /* skip over the 'import' keyword */
2591 switch (TYPE(CHILD(n, idx))) {
2592 case STAR:
2593 /* from ... import * */
2594 n = CHILD(n, idx);
2595 n_children = 1;
2596 break;
2597 case LPAR:
2598 /* from ... import (x, y, z) */
2599 n = CHILD(n, idx + 1);
2600 n_children = NCH(n);
2601 break;
2602 case import_as_names:
2603 /* from ... import x, y, z */
2604 n = CHILD(n, idx);
2605 n_children = NCH(n);
2606 if (n_children % 2 == 0) {
2607 ast_error(n, "trailing comma not allowed without"
2608 " surrounding parentheses");
2609 return NULL;
2610 }
2611 break;
2612 default:
2613 ast_error(n, "Unexpected node-type in from-import");
2614 return NULL;
2615 }
2616
2617 aliases = asdl_seq_new((n_children + 1) / 2, c->c_arena);
2618 if (!aliases)
2619 return NULL;
2620
2621 /* handle "from ... import *" special b/c there's no children */
2622 if (TYPE(n) == STAR) {
2623 alias_ty import_alias = alias_for_import_name(c, n, 1);
2624 if (!import_alias)
2625 return NULL;
2626 asdl_seq_SET(aliases, 0, import_alias);
2627 }
2628 else {
2629 for (i = 0; i < NCH(n); i += 2) {
2630 alias_ty import_alias = alias_for_import_name(c, CHILD(n, i), 1);
2631 if (!import_alias)
2632 return NULL;
2633 asdl_seq_SET(aliases, i / 2, import_alias);
2634 }
2635 }
2636 if (mod != NULL)
2637 modname = mod->name;
2638 return ImportFrom(modname, aliases, ndots, lineno, col_offset,
2639 c->c_arena);
2640 }
2641 PyErr_Format(PyExc_SystemError,
2642 "unknown import statement: starts with command '%s'",
2643 STR(CHILD(n, 0)));
2644 return NULL;
2645 }
2646
2647 static stmt_ty
ast_for_global_stmt(struct compiling * c,const node * n)2648 ast_for_global_stmt(struct compiling *c, const node *n)
2649 {
2650 /* global_stmt: 'global' NAME (',' NAME)* */
2651 identifier name;
2652 asdl_seq *s;
2653 int i;
2654
2655 REQ(n, global_stmt);
2656 s = asdl_seq_new(NCH(n) / 2, c->c_arena);
2657 if (!s)
2658 return NULL;
2659 for (i = 1; i < NCH(n); i += 2) {
2660 name = NEW_IDENTIFIER(CHILD(n, i));
2661 if (!name)
2662 return NULL;
2663 asdl_seq_SET(s, i / 2, name);
2664 }
2665 return Global(s, LINENO(n), n->n_col_offset, c->c_arena);
2666 }
2667
2668 static stmt_ty
ast_for_exec_stmt(struct compiling * c,const node * n)2669 ast_for_exec_stmt(struct compiling *c, const node *n)
2670 {
2671 expr_ty expr1, globals = NULL, locals = NULL;
2672 int n_children = NCH(n);
2673 if (n_children != 2 && n_children != 4 && n_children != 6) {
2674 PyErr_Format(PyExc_SystemError,
2675 "poorly formed 'exec' statement: %d parts to statement",
2676 n_children);
2677 return NULL;
2678 }
2679
2680 /* exec_stmt: 'exec' expr ['in' test [',' test]] */
2681 REQ(n, exec_stmt);
2682 expr1 = ast_for_expr(c, CHILD(n, 1));
2683 if (!expr1)
2684 return NULL;
2685
2686 if (expr1->kind == Tuple_kind && n_children < 4 &&
2687 (asdl_seq_LEN(expr1->v.Tuple.elts) == 2 ||
2688 asdl_seq_LEN(expr1->v.Tuple.elts) == 3)) {
2689 /* Backwards compatibility: passing exec args as a tuple */
2690 globals = asdl_seq_GET(expr1->v.Tuple.elts, 1);
2691 if (asdl_seq_LEN(expr1->v.Tuple.elts) == 3) {
2692 locals = asdl_seq_GET(expr1->v.Tuple.elts, 2);
2693 }
2694 expr1 = asdl_seq_GET(expr1->v.Tuple.elts, 0);
2695 }
2696
2697 if (n_children >= 4) {
2698 globals = ast_for_expr(c, CHILD(n, 3));
2699 if (!globals)
2700 return NULL;
2701 }
2702 if (n_children == 6) {
2703 locals = ast_for_expr(c, CHILD(n, 5));
2704 if (!locals)
2705 return NULL;
2706 }
2707
2708 return Exec(expr1, globals, locals, LINENO(n), n->n_col_offset,
2709 c->c_arena);
2710 }
2711
2712 static stmt_ty
ast_for_assert_stmt(struct compiling * c,const node * n)2713 ast_for_assert_stmt(struct compiling *c, const node *n)
2714 {
2715 /* assert_stmt: 'assert' test [',' test] */
2716 REQ(n, assert_stmt);
2717 if (NCH(n) == 2) {
2718 expr_ty expression = ast_for_expr(c, CHILD(n, 1));
2719 if (!expression)
2720 return NULL;
2721 return Assert(expression, NULL, LINENO(n), n->n_col_offset,
2722 c->c_arena);
2723 }
2724 else if (NCH(n) == 4) {
2725 expr_ty expr1, expr2;
2726
2727 expr1 = ast_for_expr(c, CHILD(n, 1));
2728 if (!expr1)
2729 return NULL;
2730 expr2 = ast_for_expr(c, CHILD(n, 3));
2731 if (!expr2)
2732 return NULL;
2733
2734 return Assert(expr1, expr2, LINENO(n), n->n_col_offset, c->c_arena);
2735 }
2736 PyErr_Format(PyExc_SystemError,
2737 "improper number of parts to 'assert' statement: %d",
2738 NCH(n));
2739 return NULL;
2740 }
2741
2742 static asdl_seq *
ast_for_suite(struct compiling * c,const node * n)2743 ast_for_suite(struct compiling *c, const node *n)
2744 {
2745 /* suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT */
2746 asdl_seq *seq;
2747 stmt_ty s;
2748 int i, total, num, end, pos = 0;
2749 node *ch;
2750
2751 REQ(n, suite);
2752
2753 total = num_stmts(n);
2754 seq = asdl_seq_new(total, c->c_arena);
2755 if (!seq)
2756 return NULL;
2757 if (TYPE(CHILD(n, 0)) == simple_stmt) {
2758 n = CHILD(n, 0);
2759 /* simple_stmt always ends with a NEWLINE,
2760 and may have a trailing SEMI
2761 */
2762 end = NCH(n) - 1;
2763 if (TYPE(CHILD(n, end - 1)) == SEMI)
2764 end--;
2765 /* loop by 2 to skip semi-colons */
2766 for (i = 0; i < end; i += 2) {
2767 ch = CHILD(n, i);
2768 s = ast_for_stmt(c, ch);
2769 if (!s)
2770 return NULL;
2771 asdl_seq_SET(seq, pos++, s);
2772 }
2773 }
2774 else {
2775 for (i = 2; i < (NCH(n) - 1); i++) {
2776 ch = CHILD(n, i);
2777 REQ(ch, stmt);
2778 num = num_stmts(ch);
2779 if (num == 1) {
2780 /* small_stmt or compound_stmt with only one child */
2781 s = ast_for_stmt(c, ch);
2782 if (!s)
2783 return NULL;
2784 asdl_seq_SET(seq, pos++, s);
2785 }
2786 else {
2787 int j;
2788 ch = CHILD(ch, 0);
2789 REQ(ch, simple_stmt);
2790 for (j = 0; j < NCH(ch); j += 2) {
2791 /* statement terminates with a semi-colon ';' */
2792 if (NCH(CHILD(ch, j)) == 0) {
2793 assert((j + 1) == NCH(ch));
2794 break;
2795 }
2796 s = ast_for_stmt(c, CHILD(ch, j));
2797 if (!s)
2798 return NULL;
2799 asdl_seq_SET(seq, pos++, s);
2800 }
2801 }
2802 }
2803 }
2804 assert(pos == seq->size);
2805 return seq;
2806 }
2807
2808 static stmt_ty
ast_for_if_stmt(struct compiling * c,const node * n)2809 ast_for_if_stmt(struct compiling *c, const node *n)
2810 {
2811 /* if_stmt: 'if' test ':' suite ('elif' test ':' suite)*
2812 ['else' ':' suite]
2813 */
2814 char *s;
2815
2816 REQ(n, if_stmt);
2817
2818 if (NCH(n) == 4) {
2819 expr_ty expression;
2820 asdl_seq *suite_seq;
2821
2822 expression = ast_for_expr(c, CHILD(n, 1));
2823 if (!expression)
2824 return NULL;
2825 suite_seq = ast_for_suite(c, CHILD(n, 3));
2826 if (!suite_seq)
2827 return NULL;
2828
2829 return If(expression, suite_seq, NULL, LINENO(n), n->n_col_offset,
2830 c->c_arena);
2831 }
2832
2833 s = STR(CHILD(n, 4));
2834 /* s[2], the third character in the string, will be
2835 's' for el_s_e, or
2836 'i' for el_i_f
2837 */
2838 if (s[2] == 's') {
2839 expr_ty expression;
2840 asdl_seq *seq1, *seq2;
2841
2842 expression = ast_for_expr(c, CHILD(n, 1));
2843 if (!expression)
2844 return NULL;
2845 seq1 = ast_for_suite(c, CHILD(n, 3));
2846 if (!seq1)
2847 return NULL;
2848 seq2 = ast_for_suite(c, CHILD(n, 6));
2849 if (!seq2)
2850 return NULL;
2851
2852 return If(expression, seq1, seq2, LINENO(n), n->n_col_offset,
2853 c->c_arena);
2854 }
2855 else if (s[2] == 'i') {
2856 int i, n_elif, has_else = 0;
2857 expr_ty expression;
2858 asdl_seq *suite_seq;
2859 asdl_seq *orelse = NULL;
2860 n_elif = NCH(n) - 4;
2861 /* must reference the child n_elif+1 since 'else' token is third,
2862 not fourth, child from the end. */
2863 if (TYPE(CHILD(n, (n_elif + 1))) == NAME
2864 && STR(CHILD(n, (n_elif + 1)))[2] == 's') {
2865 has_else = 1;
2866 n_elif -= 3;
2867 }
2868 n_elif /= 4;
2869
2870 if (has_else) {
2871 asdl_seq *suite_seq2;
2872
2873 orelse = asdl_seq_new(1, c->c_arena);
2874 if (!orelse)
2875 return NULL;
2876 expression = ast_for_expr(c, CHILD(n, NCH(n) - 6));
2877 if (!expression)
2878 return NULL;
2879 suite_seq = ast_for_suite(c, CHILD(n, NCH(n) - 4));
2880 if (!suite_seq)
2881 return NULL;
2882 suite_seq2 = ast_for_suite(c, CHILD(n, NCH(n) - 1));
2883 if (!suite_seq2)
2884 return NULL;
2885
2886 asdl_seq_SET(orelse, 0,
2887 If(expression, suite_seq, suite_seq2,
2888 LINENO(CHILD(n, NCH(n) - 6)),
2889 CHILD(n, NCH(n) - 6)->n_col_offset,
2890 c->c_arena));
2891 /* the just-created orelse handled the last elif */
2892 n_elif--;
2893 }
2894
2895 for (i = 0; i < n_elif; i++) {
2896 int off = 5 + (n_elif - i - 1) * 4;
2897 asdl_seq *newobj = asdl_seq_new(1, c->c_arena);
2898 if (!newobj)
2899 return NULL;
2900 expression = ast_for_expr(c, CHILD(n, off));
2901 if (!expression)
2902 return NULL;
2903 suite_seq = ast_for_suite(c, CHILD(n, off + 2));
2904 if (!suite_seq)
2905 return NULL;
2906
2907 asdl_seq_SET(newobj, 0,
2908 If(expression, suite_seq, orelse,
2909 LINENO(CHILD(n, off)),
2910 CHILD(n, off)->n_col_offset, c->c_arena));
2911 orelse = newobj;
2912 }
2913 expression = ast_for_expr(c, CHILD(n, 1));
2914 if (!expression)
2915 return NULL;
2916 suite_seq = ast_for_suite(c, CHILD(n, 3));
2917 if (!suite_seq)
2918 return NULL;
2919 return If(expression, suite_seq, orelse,
2920 LINENO(n), n->n_col_offset, c->c_arena);
2921 }
2922
2923 PyErr_Format(PyExc_SystemError,
2924 "unexpected token in 'if' statement: %s", s);
2925 return NULL;
2926 }
2927
2928 static stmt_ty
ast_for_while_stmt(struct compiling * c,const node * n)2929 ast_for_while_stmt(struct compiling *c, const node *n)
2930 {
2931 /* while_stmt: 'while' test ':' suite ['else' ':' suite] */
2932 REQ(n, while_stmt);
2933
2934 if (NCH(n) == 4) {
2935 expr_ty expression;
2936 asdl_seq *suite_seq;
2937
2938 expression = ast_for_expr(c, CHILD(n, 1));
2939 if (!expression)
2940 return NULL;
2941 suite_seq = ast_for_suite(c, CHILD(n, 3));
2942 if (!suite_seq)
2943 return NULL;
2944 return While(expression, suite_seq, NULL, LINENO(n), n->n_col_offset,
2945 c->c_arena);
2946 }
2947 else if (NCH(n) == 7) {
2948 expr_ty expression;
2949 asdl_seq *seq1, *seq2;
2950
2951 expression = ast_for_expr(c, CHILD(n, 1));
2952 if (!expression)
2953 return NULL;
2954 seq1 = ast_for_suite(c, CHILD(n, 3));
2955 if (!seq1)
2956 return NULL;
2957 seq2 = ast_for_suite(c, CHILD(n, 6));
2958 if (!seq2)
2959 return NULL;
2960
2961 return While(expression, seq1, seq2, LINENO(n), n->n_col_offset,
2962 c->c_arena);
2963 }
2964
2965 PyErr_Format(PyExc_SystemError,
2966 "wrong number of tokens for 'while' statement: %d",
2967 NCH(n));
2968 return NULL;
2969 }
2970
2971 static stmt_ty
ast_for_for_stmt(struct compiling * c,const node * n)2972 ast_for_for_stmt(struct compiling *c, const node *n)
2973 {
2974 asdl_seq *_target, *seq = NULL, *suite_seq;
2975 expr_ty expression;
2976 expr_ty target, first;
2977 const node *node_target;
2978 /* for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] */
2979 REQ(n, for_stmt);
2980
2981 if (NCH(n) == 9) {
2982 seq = ast_for_suite(c, CHILD(n, 8));
2983 if (!seq)
2984 return NULL;
2985 }
2986
2987 node_target = CHILD(n, 1);
2988 _target = ast_for_exprlist(c, node_target, Store);
2989 if (!_target)
2990 return NULL;
2991 /* Check the # of children rather than the length of _target, since
2992 for x, in ... has 1 element in _target, but still requires a Tuple. */
2993 first = (expr_ty)asdl_seq_GET(_target, 0);
2994 if (NCH(node_target) == 1)
2995 target = first;
2996 else
2997 target = Tuple(_target, Store, first->lineno, first->col_offset, c->c_arena);
2998
2999 expression = ast_for_testlist(c, CHILD(n, 3));
3000 if (!expression)
3001 return NULL;
3002 suite_seq = ast_for_suite(c, CHILD(n, 5));
3003 if (!suite_seq)
3004 return NULL;
3005
3006 return For(target, expression, suite_seq, seq, LINENO(n), n->n_col_offset,
3007 c->c_arena);
3008 }
3009
3010 static excepthandler_ty
ast_for_except_clause(struct compiling * c,const node * exc,node * body)3011 ast_for_except_clause(struct compiling *c, const node *exc, node *body)
3012 {
3013 /* except_clause: 'except' [test [(',' | 'as') test]] */
3014 REQ(exc, except_clause);
3015 REQ(body, suite);
3016
3017 if (NCH(exc) == 1) {
3018 asdl_seq *suite_seq = ast_for_suite(c, body);
3019 if (!suite_seq)
3020 return NULL;
3021
3022 return ExceptHandler(NULL, NULL, suite_seq, LINENO(exc),
3023 exc->n_col_offset, c->c_arena);
3024 }
3025 else if (NCH(exc) == 2) {
3026 expr_ty expression;
3027 asdl_seq *suite_seq;
3028
3029 expression = ast_for_expr(c, CHILD(exc, 1));
3030 if (!expression)
3031 return NULL;
3032 suite_seq = ast_for_suite(c, body);
3033 if (!suite_seq)
3034 return NULL;
3035
3036 return ExceptHandler(expression, NULL, suite_seq, LINENO(exc),
3037 exc->n_col_offset, c->c_arena);
3038 }
3039 else if (NCH(exc) == 4) {
3040 asdl_seq *suite_seq;
3041 expr_ty expression;
3042 expr_ty e = ast_for_expr(c, CHILD(exc, 3));
3043 if (!e)
3044 return NULL;
3045 if (!set_context(c, e, Store, CHILD(exc, 3)))
3046 return NULL;
3047 expression = ast_for_expr(c, CHILD(exc, 1));
3048 if (!expression)
3049 return NULL;
3050 suite_seq = ast_for_suite(c, body);
3051 if (!suite_seq)
3052 return NULL;
3053
3054 return ExceptHandler(expression, e, suite_seq, LINENO(exc),
3055 exc->n_col_offset, c->c_arena);
3056 }
3057
3058 PyErr_Format(PyExc_SystemError,
3059 "wrong number of children for 'except' clause: %d",
3060 NCH(exc));
3061 return NULL;
3062 }
3063
3064 static stmt_ty
ast_for_try_stmt(struct compiling * c,const node * n)3065 ast_for_try_stmt(struct compiling *c, const node *n)
3066 {
3067 const int nch = NCH(n);
3068 int n_except = (nch - 3)/3;
3069 asdl_seq *body, *orelse = NULL, *finally = NULL;
3070
3071 REQ(n, try_stmt);
3072
3073 body = ast_for_suite(c, CHILD(n, 2));
3074 if (body == NULL)
3075 return NULL;
3076
3077 if (TYPE(CHILD(n, nch - 3)) == NAME) {
3078 if (strcmp(STR(CHILD(n, nch - 3)), "finally") == 0) {
3079 if (nch >= 9 && TYPE(CHILD(n, nch - 6)) == NAME) {
3080 /* we can assume it's an "else",
3081 because nch >= 9 for try-else-finally and
3082 it would otherwise have a type of except_clause */
3083 orelse = ast_for_suite(c, CHILD(n, nch - 4));
3084 if (orelse == NULL)
3085 return NULL;
3086 n_except--;
3087 }
3088
3089 finally = ast_for_suite(c, CHILD(n, nch - 1));
3090 if (finally == NULL)
3091 return NULL;
3092 n_except--;
3093 }
3094 else {
3095 /* we can assume it's an "else",
3096 otherwise it would have a type of except_clause */
3097 orelse = ast_for_suite(c, CHILD(n, nch - 1));
3098 if (orelse == NULL)
3099 return NULL;
3100 n_except--;
3101 }
3102 }
3103 else if (TYPE(CHILD(n, nch - 3)) != except_clause) {
3104 ast_error(n, "malformed 'try' statement");
3105 return NULL;
3106 }
3107
3108 if (n_except > 0) {
3109 int i;
3110 stmt_ty except_st;
3111 /* process except statements to create a try ... except */
3112 asdl_seq *handlers = asdl_seq_new(n_except, c->c_arena);
3113 if (handlers == NULL)
3114 return NULL;
3115
3116 for (i = 0; i < n_except; i++) {
3117 excepthandler_ty e = ast_for_except_clause(c, CHILD(n, 3 + i * 3),
3118 CHILD(n, 5 + i * 3));
3119 if (!e)
3120 return NULL;
3121 asdl_seq_SET(handlers, i, e);
3122 }
3123
3124 except_st = TryExcept(body, handlers, orelse, LINENO(n),
3125 n->n_col_offset, c->c_arena);
3126 if (!finally)
3127 return except_st;
3128
3129 /* if a 'finally' is present too, we nest the TryExcept within a
3130 TryFinally to emulate try ... except ... finally */
3131 body = asdl_seq_new(1, c->c_arena);
3132 if (body == NULL)
3133 return NULL;
3134 asdl_seq_SET(body, 0, except_st);
3135 }
3136
3137 /* must be a try ... finally (except clauses are in body, if any exist) */
3138 assert(finally != NULL);
3139 return TryFinally(body, finally, LINENO(n), n->n_col_offset, c->c_arena);
3140 }
3141
3142 /* with_item: test ['as' expr] */
3143 static stmt_ty
ast_for_with_item(struct compiling * c,const node * n,asdl_seq * content)3144 ast_for_with_item(struct compiling *c, const node *n, asdl_seq *content)
3145 {
3146 expr_ty context_expr, optional_vars = NULL;
3147
3148 REQ(n, with_item);
3149 context_expr = ast_for_expr(c, CHILD(n, 0));
3150 if (!context_expr)
3151 return NULL;
3152 if (NCH(n) == 3) {
3153 optional_vars = ast_for_expr(c, CHILD(n, 2));
3154
3155 if (!optional_vars) {
3156 return NULL;
3157 }
3158 if (!set_context(c, optional_vars, Store, n)) {
3159 return NULL;
3160 }
3161 }
3162
3163 return With(context_expr, optional_vars, content, LINENO(n),
3164 n->n_col_offset, c->c_arena);
3165 }
3166
3167 /* with_stmt: 'with' with_item (',' with_item)* ':' suite */
3168 static stmt_ty
ast_for_with_stmt(struct compiling * c,const node * n)3169 ast_for_with_stmt(struct compiling *c, const node *n)
3170 {
3171 int i;
3172 stmt_ty ret;
3173 asdl_seq *inner;
3174
3175 REQ(n, with_stmt);
3176
3177 /* process the with items inside-out */
3178 i = NCH(n) - 1;
3179 /* the suite of the innermost with item is the suite of the with stmt */
3180 inner = ast_for_suite(c, CHILD(n, i));
3181 if (!inner)
3182 return NULL;
3183
3184 for (;;) {
3185 i -= 2;
3186 ret = ast_for_with_item(c, CHILD(n, i), inner);
3187 if (!ret)
3188 return NULL;
3189 /* was this the last item? */
3190 if (i == 1)
3191 break;
3192 /* if not, wrap the result so far in a new sequence */
3193 inner = asdl_seq_new(1, c->c_arena);
3194 if (!inner)
3195 return NULL;
3196 asdl_seq_SET(inner, 0, ret);
3197 }
3198
3199 return ret;
3200 }
3201
3202 static stmt_ty
ast_for_classdef(struct compiling * c,const node * n,asdl_seq * decorator_seq)3203 ast_for_classdef(struct compiling *c, const node *n, asdl_seq *decorator_seq)
3204 {
3205 /* classdef: 'class' NAME ['(' testlist ')'] ':' suite */
3206 PyObject *classname;
3207 asdl_seq *bases, *s;
3208
3209 REQ(n, classdef);
3210
3211 if (!forbidden_check(c, n, STR(CHILD(n, 1))))
3212 return NULL;
3213
3214 if (NCH(n) == 4) {
3215 s = ast_for_suite(c, CHILD(n, 3));
3216 if (!s)
3217 return NULL;
3218 classname = NEW_IDENTIFIER(CHILD(n, 1));
3219 if (!classname)
3220 return NULL;
3221 return ClassDef(classname, NULL, s, decorator_seq, LINENO(n),
3222 n->n_col_offset, c->c_arena);
3223 }
3224 /* check for empty base list */
3225 if (TYPE(CHILD(n,3)) == RPAR) {
3226 s = ast_for_suite(c, CHILD(n,5));
3227 if (!s)
3228 return NULL;
3229 classname = NEW_IDENTIFIER(CHILD(n, 1));
3230 if (!classname)
3231 return NULL;
3232 return ClassDef(classname, NULL, s, decorator_seq, LINENO(n),
3233 n->n_col_offset, c->c_arena);
3234 }
3235
3236 /* else handle the base class list */
3237 bases = ast_for_class_bases(c, CHILD(n, 3));
3238 if (!bases)
3239 return NULL;
3240
3241 s = ast_for_suite(c, CHILD(n, 6));
3242 if (!s)
3243 return NULL;
3244 classname = NEW_IDENTIFIER(CHILD(n, 1));
3245 if (!classname)
3246 return NULL;
3247 return ClassDef(classname, bases, s, decorator_seq,
3248 LINENO(n), n->n_col_offset, c->c_arena);
3249 }
3250
3251 static stmt_ty
ast_for_stmt(struct compiling * c,const node * n)3252 ast_for_stmt(struct compiling *c, const node *n)
3253 {
3254 if (TYPE(n) == stmt) {
3255 assert(NCH(n) == 1);
3256 n = CHILD(n, 0);
3257 }
3258 if (TYPE(n) == simple_stmt) {
3259 assert(num_stmts(n) == 1);
3260 n = CHILD(n, 0);
3261 }
3262 if (TYPE(n) == small_stmt) {
3263 n = CHILD(n, 0);
3264 /* small_stmt: expr_stmt | print_stmt | del_stmt | pass_stmt
3265 | flow_stmt | import_stmt | global_stmt | exec_stmt
3266 | assert_stmt
3267 */
3268 switch (TYPE(n)) {
3269 case expr_stmt:
3270 return ast_for_expr_stmt(c, n);
3271 case print_stmt:
3272 return ast_for_print_stmt(c, n);
3273 case del_stmt:
3274 return ast_for_del_stmt(c, n);
3275 case pass_stmt:
3276 return Pass(LINENO(n), n->n_col_offset, c->c_arena);
3277 case flow_stmt:
3278 return ast_for_flow_stmt(c, n);
3279 case import_stmt:
3280 return ast_for_import_stmt(c, n);
3281 case global_stmt:
3282 return ast_for_global_stmt(c, n);
3283 case exec_stmt:
3284 return ast_for_exec_stmt(c, n);
3285 case assert_stmt:
3286 return ast_for_assert_stmt(c, n);
3287 default:
3288 PyErr_Format(PyExc_SystemError,
3289 "unhandled small_stmt: TYPE=%d NCH=%d\n",
3290 TYPE(n), NCH(n));
3291 return NULL;
3292 }
3293 }
3294 else {
3295 /* compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt
3296 | funcdef | classdef | decorated
3297 */
3298 node *ch = CHILD(n, 0);
3299 REQ(n, compound_stmt);
3300 switch (TYPE(ch)) {
3301 case if_stmt:
3302 return ast_for_if_stmt(c, ch);
3303 case while_stmt:
3304 return ast_for_while_stmt(c, ch);
3305 case for_stmt:
3306 return ast_for_for_stmt(c, ch);
3307 case try_stmt:
3308 return ast_for_try_stmt(c, ch);
3309 case with_stmt:
3310 return ast_for_with_stmt(c, ch);
3311 case funcdef:
3312 return ast_for_funcdef(c, ch, NULL);
3313 case classdef:
3314 return ast_for_classdef(c, ch, NULL);
3315 case decorated:
3316 return ast_for_decorated(c, ch);
3317 default:
3318 PyErr_Format(PyExc_SystemError,
3319 "unhandled small_stmt: TYPE=%d NCH=%d\n",
3320 TYPE(n), NCH(n));
3321 return NULL;
3322 }
3323 }
3324 }
3325
3326 static PyObject *
parsenumber(struct compiling * c,const char * s)3327 parsenumber(struct compiling *c, const char *s)
3328 {
3329 const char *end;
3330 long x;
3331 double dx;
3332 #ifndef WITHOUT_COMPLEX
3333 Py_complex complex;
3334 int imflag;
3335 #endif
3336
3337 assert(s != NULL);
3338 errno = 0;
3339 end = s + strlen(s) - 1;
3340 #ifndef WITHOUT_COMPLEX
3341 imflag = *end == 'j' || *end == 'J';
3342 #endif
3343 if (*end == 'l' || *end == 'L')
3344 return PyLong_FromString((char *)s, (char **)0, 0);
3345 x = PyOS_strtol((char *)s, (char **)&end, 0);
3346 if (*end == '\0') {
3347 if (errno != 0)
3348 return PyLong_FromString((char *)s, (char **)0, 0);
3349 return PyInt_FromLong(x);
3350 }
3351 /* XXX Huge floats may silently fail */
3352 #ifndef WITHOUT_COMPLEX
3353 if (imflag) {
3354 complex.real = 0.;
3355 complex.imag = PyOS_string_to_double(s, (char **)&end, NULL);
3356 if (complex.imag == -1.0 && PyErr_Occurred())
3357 return NULL;
3358 return PyComplex_FromCComplex(complex);
3359 }
3360 else
3361 #endif
3362 {
3363 dx = PyOS_string_to_double(s, NULL, NULL);
3364 if (dx == -1.0 && PyErr_Occurred())
3365 return NULL;
3366 return PyFloat_FromDouble(dx);
3367 }
3368 }
3369
3370 static PyObject *
decode_utf8(struct compiling * c,const char ** sPtr,const char * end,char * encoding)3371 decode_utf8(struct compiling *c, const char **sPtr, const char *end, char* encoding)
3372 {
3373 #ifndef Py_USING_UNICODE
3374 Py_FatalError("decode_utf8 should not be called in this build.");
3375 return NULL;
3376 #else
3377 PyObject *u, *v;
3378 char *s, *t;
3379 t = s = (char *)*sPtr;
3380 /* while (s < end && *s != '\\') s++; */ /* inefficient for u".." */
3381 while (s < end && (*s & 0x80)) s++;
3382 *sPtr = s;
3383 u = PyUnicode_DecodeUTF8(t, s - t, NULL);
3384 if (u == NULL)
3385 return NULL;
3386 v = PyUnicode_AsEncodedString(u, encoding, NULL);
3387 Py_DECREF(u);
3388 return v;
3389 #endif
3390 }
3391
3392 #ifdef Py_USING_UNICODE
3393 static PyObject *
decode_unicode(struct compiling * c,const char * s,size_t len,int rawmode,const char * encoding)3394 decode_unicode(struct compiling *c, const char *s, size_t len, int rawmode, const char *encoding)
3395 {
3396 PyObject *v;
3397 PyObject *u = NULL;
3398 char *buf;
3399 char *p;
3400 const char *end;
3401 if (encoding != NULL && strcmp(encoding, "iso-8859-1")) {
3402 /* check for integer overflow */
3403 if (len > PY_SIZE_MAX / 6)
3404 return NULL;
3405 /* "<C3><A4>" (2 bytes) may become "\U000000E4" (10 bytes), or 1:5
3406 "\ä" (3 bytes) may become "\u005c\U000000E4" (16 bytes), or ~1:6 */
3407 u = PyString_FromStringAndSize((char *)NULL, len * 6);
3408 if (u == NULL)
3409 return NULL;
3410 p = buf = PyString_AsString(u);
3411 end = s + len;
3412 while (s < end) {
3413 if (*s == '\\') {
3414 *p++ = *s++;
3415 if (*s & 0x80) {
3416 strcpy(p, "u005c");
3417 p += 5;
3418 }
3419 }
3420 if (*s & 0x80) { /* XXX inefficient */
3421 PyObject *w;
3422 char *r;
3423 Py_ssize_t rn, i;
3424 w = decode_utf8(c, &s, end, "utf-32-be");
3425 if (w == NULL) {
3426 Py_DECREF(u);
3427 return NULL;
3428 }
3429 r = PyString_AsString(w);
3430 rn = PyString_Size(w);
3431 assert(rn % 4 == 0);
3432 for (i = 0; i < rn; i += 4) {
3433 sprintf(p, "\\U%02x%02x%02x%02x",
3434 r[i + 0] & 0xFF,
3435 r[i + 1] & 0xFF,
3436 r[i + 2] & 0xFF,
3437 r[i + 3] & 0xFF);
3438 p += 10;
3439 }
3440 Py_DECREF(w);
3441 } else {
3442 *p++ = *s++;
3443 }
3444 }
3445 len = p - buf;
3446 s = buf;
3447 }
3448 if (rawmode)
3449 v = PyUnicode_DecodeRawUnicodeEscape(s, len, NULL);
3450 else
3451 v = PyUnicode_DecodeUnicodeEscape(s, len, NULL);
3452 Py_XDECREF(u);
3453 return v;
3454 }
3455 #endif
3456
3457 /* s is a Python string literal, including the bracketing quote characters,
3458 * and r &/or u prefixes (if any), and embedded escape sequences (if any).
3459 * parsestr parses it, and returns the decoded Python string object.
3460 */
3461 static PyObject *
parsestr(struct compiling * c,const node * n,const char * s)3462 parsestr(struct compiling *c, const node *n, const char *s)
3463 {
3464 size_t len, i;
3465 int quote = Py_CHARMASK(*s);
3466 int rawmode = 0;
3467 int need_encoding;
3468 int unicode = c->c_future_unicode;
3469 int bytes = 0;
3470
3471 if (isalpha(quote) || quote == '_') {
3472 if (quote == 'u' || quote == 'U') {
3473 quote = *++s;
3474 unicode = 1;
3475 }
3476 if (quote == 'b' || quote == 'B') {
3477 quote = *++s;
3478 unicode = 0;
3479 bytes = 1;
3480 }
3481 if (quote == 'r' || quote == 'R') {
3482 quote = *++s;
3483 rawmode = 1;
3484 }
3485 }
3486 if (quote != '\'' && quote != '\"') {
3487 PyErr_BadInternalCall();
3488 return NULL;
3489 }
3490 s++;
3491 len = strlen(s);
3492 if (len > INT_MAX) {
3493 PyErr_SetString(PyExc_OverflowError,
3494 "string to parse is too long");
3495 return NULL;
3496 }
3497 if (s[--len] != quote) {
3498 PyErr_BadInternalCall();
3499 return NULL;
3500 }
3501 if (len >= 4 && s[0] == quote && s[1] == quote) {
3502 s += 2;
3503 len -= 2;
3504 if (s[--len] != quote || s[--len] != quote) {
3505 PyErr_BadInternalCall();
3506 return NULL;
3507 }
3508 }
3509 if (Py_Py3kWarningFlag && bytes) {
3510 for (i = 0; i < len; i++) {
3511 if ((unsigned char)s[i] > 127) {
3512 if (!ast_warn(c, n,
3513 "non-ascii bytes literals not supported in 3.x"))
3514 return NULL;
3515 break;
3516 }
3517 }
3518 }
3519 #ifdef Py_USING_UNICODE
3520 if (unicode || Py_UnicodeFlag) {
3521 return decode_unicode(c, s, len, rawmode, c->c_encoding);
3522 }
3523 #endif
3524 need_encoding = (c->c_encoding != NULL &&
3525 strcmp(c->c_encoding, "utf-8") != 0 &&
3526 strcmp(c->c_encoding, "iso-8859-1") != 0);
3527 if (rawmode || strchr(s, '\\') == NULL) {
3528 if (need_encoding) {
3529 #ifndef Py_USING_UNICODE
3530 /* This should not happen - we never see any other
3531 encoding. */
3532 Py_FatalError(
3533 "cannot deal with encodings in this build.");
3534 #else
3535 PyObject *v, *u = PyUnicode_DecodeUTF8(s, len, NULL);
3536 if (u == NULL)
3537 return NULL;
3538 v = PyUnicode_AsEncodedString(u, c->c_encoding, NULL);
3539 Py_DECREF(u);
3540 return v;
3541 #endif
3542 } else {
3543 return PyString_FromStringAndSize(s, len);
3544 }
3545 }
3546
3547 return PyString_DecodeEscape(s, len, NULL, unicode,
3548 need_encoding ? c->c_encoding : NULL);
3549 }
3550
3551 /* Build a Python string object out of a STRING atom. This takes care of
3552 * compile-time literal catenation, calling parsestr() on each piece, and
3553 * pasting the intermediate results together.
3554 */
3555 static PyObject *
parsestrplus(struct compiling * c,const node * n)3556 parsestrplus(struct compiling *c, const node *n)
3557 {
3558 PyObject *v;
3559 int i;
3560 REQ(CHILD(n, 0), STRING);
3561 if ((v = parsestr(c, n, STR(CHILD(n, 0)))) != NULL) {
3562 /* String literal concatenation */
3563 for (i = 1; i < NCH(n); i++) {
3564 PyObject *s;
3565 s = parsestr(c, n, STR(CHILD(n, i)));
3566 if (s == NULL)
3567 goto onError;
3568 if (PyString_Check(v) && PyString_Check(s)) {
3569 PyString_ConcatAndDel(&v, s);
3570 if (v == NULL)
3571 goto onError;
3572 }
3573 #ifdef Py_USING_UNICODE
3574 else {
3575 PyObject *temp = PyUnicode_Concat(v, s);
3576 Py_DECREF(s);
3577 Py_DECREF(v);
3578 v = temp;
3579 if (v == NULL)
3580 goto onError;
3581 }
3582 #endif
3583 }
3584 }
3585 return v;
3586
3587 onError:
3588 Py_XDECREF(v);
3589 return NULL;
3590 }
3591