1 /*
2  * This file includes functions to transform a concrete syntax tree (CST) to
3  * an abstract syntax tree (AST).  The main function is PyAST_FromNode().
4  *
5  */
6 #include "Python.h"
7 #include "Python-ast.h"
8 #include "grammar.h"
9 #include "node.h"
10 #include "pyarena.h"
11 #include "ast.h"
12 #include "token.h"
13 #include "parsetok.h"
14 #include "graminit.h"
15 
16 #include <assert.h>
17 
18 /* Data structure used internally */
19 struct compiling {
20     char *c_encoding; /* source encoding */
21     int c_future_unicode; /* __future__ unicode literals flag */
22     PyArena *c_arena; /* arena for allocating memeory */
23     const char *c_filename; /* filename */
24 };
25 
26 static asdl_seq *seq_for_testlist(struct compiling *, const node *);
27 static expr_ty ast_for_expr(struct compiling *, const node *);
28 static stmt_ty ast_for_stmt(struct compiling *, const node *);
29 static asdl_seq *ast_for_suite(struct compiling *, const node *);
30 static asdl_seq *ast_for_exprlist(struct compiling *, const node *,
31                                   expr_context_ty);
32 static expr_ty ast_for_testlist(struct compiling *, const node *);
33 static stmt_ty ast_for_classdef(struct compiling *, const node *, asdl_seq *);
34 static expr_ty ast_for_testlist_comp(struct compiling *, const node *);
35 
36 /* Note different signature for ast_for_call */
37 static expr_ty ast_for_call(struct compiling *, const node *, expr_ty);
38 
39 static PyObject *parsenumber(struct compiling *, const char *);
40 static PyObject *parsestr(struct compiling *, const node *n, const char *);
41 static PyObject *parsestrplus(struct compiling *, const node *n);
42 
43 #ifndef LINENO
44 #define LINENO(n)       ((n)->n_lineno)
45 #endif
46 
47 #define COMP_GENEXP 0
48 #define COMP_SETCOMP  1
49 
50 static identifier
new_identifier(const char * n,PyArena * arena)51 new_identifier(const char* n, PyArena *arena) {
52     PyObject* id = PyString_InternFromString(n);
53     if (id != NULL)
54         PyArena_AddPyObject(arena, id);
55     return id;
56 }
57 
58 #define NEW_IDENTIFIER(n) new_identifier(STR(n), c->c_arena)
59 
60 /* This routine provides an invalid object for the syntax error.
61    The outermost routine must unpack this error and create the
62    proper object.  We do this so that we don't have to pass
63    the filename to everything function.
64 
65    XXX Maybe we should just pass the filename...
66 */
67 
68 static int
ast_error(const node * n,const char * errstr)69 ast_error(const node *n, const char *errstr)
70 {
71     PyObject *u = Py_BuildValue("zi", errstr, LINENO(n));
72     if (!u)
73         return 0;
74     PyErr_SetObject(PyExc_SyntaxError, u);
75     Py_DECREF(u);
76     return 0;
77 }
78 
79 static void
ast_error_finish(const char * filename)80 ast_error_finish(const char *filename)
81 {
82     PyObject *type, *value, *tback, *errstr, *loc, *tmp;
83     long lineno;
84 
85     assert(PyErr_Occurred());
86     if (!PyErr_ExceptionMatches(PyExc_SyntaxError))
87         return;
88 
89     PyErr_Fetch(&type, &value, &tback);
90     errstr = PyTuple_GetItem(value, 0);
91     if (!errstr)
92         return;
93     Py_INCREF(errstr);
94     lineno = PyInt_AsLong(PyTuple_GetItem(value, 1));
95     if (lineno == -1) {
96         Py_DECREF(errstr);
97         return;
98     }
99     Py_DECREF(value);
100 
101     loc = PyErr_ProgramText(filename, lineno);
102     if (!loc) {
103         Py_INCREF(Py_None);
104         loc = Py_None;
105     }
106     tmp = Py_BuildValue("(zlOO)", filename, lineno, Py_None, loc);
107     Py_DECREF(loc);
108     if (!tmp) {
109         Py_DECREF(errstr);
110         return;
111     }
112     value = PyTuple_Pack(2, errstr, tmp);
113     Py_DECREF(errstr);
114     Py_DECREF(tmp);
115     if (!value)
116         return;
117     PyErr_Restore(type, value, tback);
118 }
119 
120 static int
ast_warn(struct compiling * c,const node * n,char * msg)121 ast_warn(struct compiling *c, const node *n, char *msg)
122 {
123     if (PyErr_WarnExplicit(PyExc_SyntaxWarning, msg, c->c_filename, LINENO(n),
124                            NULL, NULL) < 0) {
125         /* if -Werr, change it to a SyntaxError */
126         if (PyErr_Occurred() && PyErr_ExceptionMatches(PyExc_SyntaxWarning))
127             ast_error(n, msg);
128         return 0;
129     }
130     return 1;
131 }
132 
133 static int
forbidden_check(struct compiling * c,const node * n,const char * x)134 forbidden_check(struct compiling *c, const node *n, const char *x)
135 {
136     if (!strcmp(x, "None"))
137         return ast_error(n, "cannot assign to None");
138     if (!strcmp(x, "__debug__"))
139         return ast_error(n, "cannot assign to __debug__");
140     if (Py_Py3kWarningFlag) {
141         if (!(strcmp(x, "True") && strcmp(x, "False")) &&
142             !ast_warn(c, n, "assignment to True or False is forbidden in 3.x"))
143             return 0;
144         if (!strcmp(x, "nonlocal") &&
145             !ast_warn(c, n, "nonlocal is a keyword in 3.x"))
146             return 0;
147     }
148     return 1;
149 }
150 
151 /* num_stmts() returns number of contained statements.
152 
153    Use this routine to determine how big a sequence is needed for
154    the statements in a parse tree.  Its raison d'etre is this bit of
155    grammar:
156 
157    stmt: simple_stmt | compound_stmt
158    simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
159 
160    A simple_stmt can contain multiple small_stmt elements joined
161    by semicolons.  If the arg is a simple_stmt, the number of
162    small_stmt elements is returned.
163 */
164 
165 static int
num_stmts(const node * n)166 num_stmts(const node *n)
167 {
168     int i, l;
169     node *ch;
170 
171     switch (TYPE(n)) {
172         case single_input:
173             if (TYPE(CHILD(n, 0)) == NEWLINE)
174                 return 0;
175             else
176                 return num_stmts(CHILD(n, 0));
177         case file_input:
178             l = 0;
179             for (i = 0; i < NCH(n); i++) {
180                 ch = CHILD(n, i);
181                 if (TYPE(ch) == stmt)
182                     l += num_stmts(ch);
183             }
184             return l;
185         case stmt:
186             return num_stmts(CHILD(n, 0));
187         case compound_stmt:
188             return 1;
189         case simple_stmt:
190             return NCH(n) / 2; /* Divide by 2 to remove count of semi-colons */
191         case suite:
192             if (NCH(n) == 1)
193                 return num_stmts(CHILD(n, 0));
194             else {
195                 l = 0;
196                 for (i = 2; i < (NCH(n) - 1); i++)
197                     l += num_stmts(CHILD(n, i));
198                 return l;
199             }
200         default: {
201             char buf[128];
202 
203             sprintf(buf, "Non-statement found: %d %d",
204                     TYPE(n), NCH(n));
205             Py_FatalError(buf);
206         }
207     }
208     assert(0);
209     return 0;
210 }
211 
212 /* Transform the CST rooted at node * to the appropriate AST
213 */
214 
215 mod_ty
PyAST_FromNode(const node * n,PyCompilerFlags * flags,const char * filename,PyArena * arena)216 PyAST_FromNode(const node *n, PyCompilerFlags *flags, const char *filename,
217                PyArena *arena)
218 {
219     int i, j, k, num;
220     asdl_seq *stmts = NULL;
221     stmt_ty s;
222     node *ch;
223     struct compiling c;
224 
225     if (flags && flags->cf_flags & PyCF_SOURCE_IS_UTF8) {
226         c.c_encoding = "utf-8";
227         if (TYPE(n) == encoding_decl) {
228             ast_error(n, "encoding declaration in Unicode string");
229             goto error;
230         }
231     } else if (TYPE(n) == encoding_decl) {
232         c.c_encoding = STR(n);
233         n = CHILD(n, 0);
234     } else {
235         c.c_encoding = NULL;
236     }
237     c.c_future_unicode = flags && flags->cf_flags & CO_FUTURE_UNICODE_LITERALS;
238     c.c_arena = arena;
239     c.c_filename = filename;
240 
241     k = 0;
242     switch (TYPE(n)) {
243         case file_input:
244             stmts = asdl_seq_new(num_stmts(n), arena);
245             if (!stmts)
246                 return NULL;
247             for (i = 0; i < NCH(n) - 1; i++) {
248                 ch = CHILD(n, i);
249                 if (TYPE(ch) == NEWLINE)
250                     continue;
251                 REQ(ch, stmt);
252                 num = num_stmts(ch);
253                 if (num == 1) {
254                     s = ast_for_stmt(&c, ch);
255                     if (!s)
256                         goto error;
257                     asdl_seq_SET(stmts, k++, s);
258                 }
259                 else {
260                     ch = CHILD(ch, 0);
261                     REQ(ch, simple_stmt);
262                     for (j = 0; j < num; j++) {
263                         s = ast_for_stmt(&c, CHILD(ch, j * 2));
264                         if (!s)
265                             goto error;
266                         asdl_seq_SET(stmts, k++, s);
267                     }
268                 }
269             }
270             return Module(stmts, arena);
271         case eval_input: {
272             expr_ty testlist_ast;
273 
274             /* XXX Why not comp_for here? */
275             testlist_ast = ast_for_testlist(&c, CHILD(n, 0));
276             if (!testlist_ast)
277                 goto error;
278             return Expression(testlist_ast, arena);
279         }
280         case single_input:
281             if (TYPE(CHILD(n, 0)) == NEWLINE) {
282                 stmts = asdl_seq_new(1, arena);
283                 if (!stmts)
284                     goto error;
285                 asdl_seq_SET(stmts, 0, Pass(n->n_lineno, n->n_col_offset,
286                                             arena));
287                 if (!asdl_seq_GET(stmts, 0))
288                     goto error;
289                 return Interactive(stmts, arena);
290             }
291             else {
292                 n = CHILD(n, 0);
293                 num = num_stmts(n);
294                 stmts = asdl_seq_new(num, arena);
295                 if (!stmts)
296                     goto error;
297                 if (num == 1) {
298                     s = ast_for_stmt(&c, n);
299                     if (!s)
300                         goto error;
301                     asdl_seq_SET(stmts, 0, s);
302                 }
303                 else {
304                     /* Only a simple_stmt can contain multiple statements. */
305                     REQ(n, simple_stmt);
306                     for (i = 0; i < NCH(n); i += 2) {
307                         if (TYPE(CHILD(n, i)) == NEWLINE)
308                             break;
309                         s = ast_for_stmt(&c, CHILD(n, i));
310                         if (!s)
311                             goto error;
312                         asdl_seq_SET(stmts, i / 2, s);
313                     }
314                 }
315 
316                 return Interactive(stmts, arena);
317             }
318         default:
319             PyErr_Format(PyExc_SystemError,
320                          "invalid node %d for PyAST_FromNode", TYPE(n));
321             goto error;
322     }
323  error:
324     ast_error_finish(filename);
325     return NULL;
326 }
327 
328 /* Return the AST repr. of the operator represented as syntax (|, ^, etc.)
329 */
330 
331 static operator_ty
get_operator(const node * n)332 get_operator(const node *n)
333 {
334     switch (TYPE(n)) {
335         case VBAR:
336             return BitOr;
337         case CIRCUMFLEX:
338             return BitXor;
339         case AMPER:
340             return BitAnd;
341         case LEFTSHIFT:
342             return LShift;
343         case RIGHTSHIFT:
344             return RShift;
345         case PLUS:
346             return Add;
347         case MINUS:
348             return Sub;
349         case STAR:
350             return Mult;
351         case SLASH:
352             return Div;
353         case DOUBLESLASH:
354             return FloorDiv;
355         case PERCENT:
356             return Mod;
357         default:
358             return (operator_ty)0;
359     }
360 }
361 
362 /* Set the context ctx for expr_ty e, recursively traversing e.
363 
364    Only sets context for expr kinds that "can appear in assignment context"
365    (according to ../Parser/Python.asdl).  For other expr kinds, it sets
366    an appropriate syntax error and returns false.
367 */
368 
369 static int
set_context(struct compiling * c,expr_ty e,expr_context_ty ctx,const node * n)370 set_context(struct compiling *c, expr_ty e, expr_context_ty ctx, const node *n)
371 {
372     asdl_seq *s = NULL;
373     /* If a particular expression type can't be used for assign / delete,
374        set expr_name to its name and an error message will be generated.
375     */
376     const char* expr_name = NULL;
377 
378     /* The ast defines augmented store and load contexts, but the
379        implementation here doesn't actually use them.  The code may be
380        a little more complex than necessary as a result.  It also means
381        that expressions in an augmented assignment have a Store context.
382        Consider restructuring so that augmented assignment uses
383        set_context(), too.
384     */
385     assert(ctx != AugStore && ctx != AugLoad);
386 
387     switch (e->kind) {
388         case Attribute_kind:
389             if (ctx == Store && !forbidden_check(c, n,
390                                 PyBytes_AS_STRING(e->v.Attribute.attr)))
391                     return 0;
392             e->v.Attribute.ctx = ctx;
393             break;
394         case Subscript_kind:
395             e->v.Subscript.ctx = ctx;
396             break;
397         case Name_kind:
398             if (ctx == Store && !forbidden_check(c, n,
399                                 PyBytes_AS_STRING(e->v.Name.id)))
400                     return 0;
401             e->v.Name.ctx = ctx;
402             break;
403         case List_kind:
404             e->v.List.ctx = ctx;
405             s = e->v.List.elts;
406             break;
407         case Tuple_kind:
408             if (asdl_seq_LEN(e->v.Tuple.elts))  {
409                 e->v.Tuple.ctx = ctx;
410                 s = e->v.Tuple.elts;
411             }
412             else {
413                 expr_name = "()";
414             }
415             break;
416         case Lambda_kind:
417             expr_name = "lambda";
418             break;
419         case Call_kind:
420             expr_name = "function call";
421             break;
422         case BoolOp_kind:
423         case BinOp_kind:
424         case UnaryOp_kind:
425             expr_name = "operator";
426             break;
427         case GeneratorExp_kind:
428             expr_name = "generator expression";
429             break;
430         case Yield_kind:
431             expr_name = "yield expression";
432             break;
433         case ListComp_kind:
434             expr_name = "list comprehension";
435             break;
436         case SetComp_kind:
437             expr_name = "set comprehension";
438             break;
439         case DictComp_kind:
440             expr_name = "dict comprehension";
441             break;
442         case Dict_kind:
443         case Set_kind:
444         case Num_kind:
445         case Str_kind:
446             expr_name = "literal";
447             break;
448         case Compare_kind:
449             expr_name = "comparison";
450             break;
451         case Repr_kind:
452             expr_name = "repr";
453             break;
454         case IfExp_kind:
455             expr_name = "conditional expression";
456             break;
457         default:
458             PyErr_Format(PyExc_SystemError,
459                          "unexpected expression in assignment %d (line %d)",
460                          e->kind, e->lineno);
461             return 0;
462     }
463     /* Check for error string set by switch */
464     if (expr_name) {
465         char buf[300];
466         PyOS_snprintf(buf, sizeof(buf),
467                       "can't %s %s",
468                       ctx == Store ? "assign to" : "delete",
469                       expr_name);
470         return ast_error(n, buf);
471     }
472 
473     /* If the LHS is a list or tuple, we need to set the assignment
474        context for all the contained elements.
475     */
476     if (s) {
477         int i;
478 
479         for (i = 0; i < asdl_seq_LEN(s); i++) {
480             if (!set_context(c, (expr_ty)asdl_seq_GET(s, i), ctx, n))
481                 return 0;
482         }
483     }
484     return 1;
485 }
486 
487 static operator_ty
ast_for_augassign(struct compiling * c,const node * n)488 ast_for_augassign(struct compiling *c, const node *n)
489 {
490     REQ(n, augassign);
491     n = CHILD(n, 0);
492     switch (STR(n)[0]) {
493         case '+':
494             return Add;
495         case '-':
496             return Sub;
497         case '/':
498             if (STR(n)[1] == '/')
499                 return FloorDiv;
500             else
501                 return Div;
502         case '%':
503             return Mod;
504         case '<':
505             return LShift;
506         case '>':
507             return RShift;
508         case '&':
509             return BitAnd;
510         case '^':
511             return BitXor;
512         case '|':
513             return BitOr;
514         case '*':
515             if (STR(n)[1] == '*')
516                 return Pow;
517             else
518                 return Mult;
519         default:
520             PyErr_Format(PyExc_SystemError, "invalid augassign: %s", STR(n));
521             return (operator_ty)0;
522     }
523 }
524 
525 static cmpop_ty
ast_for_comp_op(struct compiling * c,const node * n)526 ast_for_comp_op(struct compiling *c, const node *n)
527 {
528     /* comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'
529                |'is' 'not'
530     */
531     REQ(n, comp_op);
532     if (NCH(n) == 1) {
533         n = CHILD(n, 0);
534         switch (TYPE(n)) {
535             case LESS:
536                 return Lt;
537             case GREATER:
538                 return Gt;
539             case EQEQUAL:                       /* == */
540                 return Eq;
541             case LESSEQUAL:
542                 return LtE;
543             case GREATEREQUAL:
544                 return GtE;
545             case NOTEQUAL:
546                 return NotEq;
547             case NAME:
548                 if (strcmp(STR(n), "in") == 0)
549                     return In;
550                 if (strcmp(STR(n), "is") == 0)
551                     return Is;
552             default:
553                 PyErr_Format(PyExc_SystemError, "invalid comp_op: %s",
554                              STR(n));
555                 return (cmpop_ty)0;
556         }
557     }
558     else if (NCH(n) == 2) {
559         /* handle "not in" and "is not" */
560         switch (TYPE(CHILD(n, 0))) {
561             case NAME:
562                 if (strcmp(STR(CHILD(n, 1)), "in") == 0)
563                     return NotIn;
564                 if (strcmp(STR(CHILD(n, 0)), "is") == 0)
565                     return IsNot;
566             default:
567                 PyErr_Format(PyExc_SystemError, "invalid comp_op: %s %s",
568                              STR(CHILD(n, 0)), STR(CHILD(n, 1)));
569                 return (cmpop_ty)0;
570         }
571     }
572     PyErr_Format(PyExc_SystemError, "invalid comp_op: has %d children",
573                  NCH(n));
574     return (cmpop_ty)0;
575 }
576 
577 static asdl_seq *
seq_for_testlist(struct compiling * c,const node * n)578 seq_for_testlist(struct compiling *c, const node *n)
579 {
580     /* testlist: test (',' test)* [','] */
581     asdl_seq *seq;
582     expr_ty expression;
583     int i;
584     assert(TYPE(n) == testlist ||
585            TYPE(n) == listmaker ||
586            TYPE(n) == testlist_comp ||
587            TYPE(n) == testlist_safe ||
588            TYPE(n) == testlist1);
589 
590     seq = asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
591     if (!seq)
592         return NULL;
593 
594     for (i = 0; i < NCH(n); i += 2) {
595         assert(TYPE(CHILD(n, i)) == test || TYPE(CHILD(n, i)) == old_test);
596 
597         expression = ast_for_expr(c, CHILD(n, i));
598         if (!expression)
599             return NULL;
600 
601         assert(i / 2 < seq->size);
602         asdl_seq_SET(seq, i / 2, expression);
603     }
604     return seq;
605 }
606 
607 static expr_ty
compiler_complex_args(struct compiling * c,const node * n)608 compiler_complex_args(struct compiling *c, const node *n)
609 {
610     int i, len = (NCH(n) + 1) / 2;
611     expr_ty result;
612     asdl_seq *args = asdl_seq_new(len, c->c_arena);
613     if (!args)
614         return NULL;
615 
616     /* fpdef: NAME | '(' fplist ')'
617        fplist: fpdef (',' fpdef)* [',']
618     */
619     REQ(n, fplist);
620     for (i = 0; i < len; i++) {
621         PyObject *arg_id;
622         const node *fpdef_node = CHILD(n, 2*i);
623         const node *child;
624         expr_ty arg;
625 set_name:
626         /* fpdef_node is either a NAME or an fplist */
627         child = CHILD(fpdef_node, 0);
628         if (TYPE(child) == NAME) {
629             if (!forbidden_check(c, n, STR(child)))
630                 return NULL;
631             arg_id = NEW_IDENTIFIER(child);
632             if (!arg_id)
633                 return NULL;
634             arg = Name(arg_id, Store, LINENO(child), child->n_col_offset,
635                        c->c_arena);
636         }
637         else {
638             assert(TYPE(fpdef_node) == fpdef);
639             /* fpdef_node[0] is not a name, so it must be '(', get CHILD[1] */
640             child = CHILD(fpdef_node, 1);
641             assert(TYPE(child) == fplist);
642             /* NCH == 1 means we have (x), we need to elide the extra parens */
643             if (NCH(child) == 1) {
644                 fpdef_node = CHILD(child, 0);
645                 assert(TYPE(fpdef_node) == fpdef);
646                 goto set_name;
647             }
648             arg = compiler_complex_args(c, child);
649         }
650         asdl_seq_SET(args, i, arg);
651     }
652 
653     result = Tuple(args, Store, LINENO(n), n->n_col_offset, c->c_arena);
654     if (!set_context(c, result, Store, n))
655         return NULL;
656     return result;
657 }
658 
659 
660 /* Create AST for argument list. */
661 
662 static arguments_ty
ast_for_arguments(struct compiling * c,const node * n)663 ast_for_arguments(struct compiling *c, const node *n)
664 {
665     /* parameters: '(' [varargslist] ')'
666        varargslist: (fpdef ['=' test] ',')* ('*' NAME [',' '**' NAME]
667             | '**' NAME) | fpdef ['=' test] (',' fpdef ['=' test])* [',']
668     */
669     int i, j, k, n_args = 0, n_defaults = 0, found_default = 0;
670     asdl_seq *args, *defaults;
671     identifier vararg = NULL, kwarg = NULL;
672     node *ch;
673 
674     if (TYPE(n) == parameters) {
675         if (NCH(n) == 2) /* () as argument list */
676             return arguments(NULL, NULL, NULL, NULL, c->c_arena);
677         n = CHILD(n, 1);
678     }
679     REQ(n, varargslist);
680 
681     /* first count the number of normal args & defaults */
682     for (i = 0; i < NCH(n); i++) {
683         ch = CHILD(n, i);
684         if (TYPE(ch) == fpdef)
685             n_args++;
686         if (TYPE(ch) == EQUAL)
687             n_defaults++;
688     }
689     args = (n_args ? asdl_seq_new(n_args, c->c_arena) : NULL);
690     if (!args && n_args)
691         return NULL;
692     defaults = (n_defaults ? asdl_seq_new(n_defaults, c->c_arena) : NULL);
693     if (!defaults && n_defaults)
694         return NULL;
695 
696     /* fpdef: NAME | '(' fplist ')'
697        fplist: fpdef (',' fpdef)* [',']
698     */
699     i = 0;
700     j = 0;  /* index for defaults */
701     k = 0;  /* index for args */
702     while (i < NCH(n)) {
703         ch = CHILD(n, i);
704         switch (TYPE(ch)) {
705             case fpdef: {
706                 int complex_args = 0, parenthesized = 0;
707             handle_fpdef:
708                 /* XXX Need to worry about checking if TYPE(CHILD(n, i+1)) is
709                    anything other than EQUAL or a comma? */
710                 /* XXX Should NCH(n) check be made a separate check? */
711                 if (i + 1 < NCH(n) && TYPE(CHILD(n, i + 1)) == EQUAL) {
712                     expr_ty expression = ast_for_expr(c, CHILD(n, i + 2));
713                     if (!expression)
714                         return NULL;
715                     assert(defaults != NULL);
716                     asdl_seq_SET(defaults, j++, expression);
717                     i += 2;
718                     found_default = 1;
719                 }
720                 else if (found_default) {
721                     /* def f((x)=4): pass should raise an error.
722                        def f((x, (y))): pass will just incur the tuple unpacking warning. */
723                     if (parenthesized && !complex_args) {
724                         ast_error(n, "parenthesized arg with default");
725                         return NULL;
726                     }
727                     ast_error(n,
728                              "non-default argument follows default argument");
729                     return NULL;
730                 }
731                 if (NCH(ch) == 3) {
732                     ch = CHILD(ch, 1);
733                     /* def foo((x)): is not complex, special case. */
734                     if (NCH(ch) != 1) {
735                         /* We have complex arguments, setup for unpacking. */
736                         if (Py_Py3kWarningFlag && !ast_warn(c, ch,
737                             "tuple parameter unpacking has been removed in 3.x"))
738                             return NULL;
739                         complex_args = 1;
740                         asdl_seq_SET(args, k++, compiler_complex_args(c, ch));
741                         if (!asdl_seq_GET(args, k-1))
742                                 return NULL;
743                     } else {
744                         /* def foo((x)): setup for checking NAME below. */
745                         /* Loop because there can be many parens and tuple
746                            unpacking mixed in. */
747                         parenthesized = 1;
748                         ch = CHILD(ch, 0);
749                         assert(TYPE(ch) == fpdef);
750                         goto handle_fpdef;
751                     }
752                 }
753                 if (TYPE(CHILD(ch, 0)) == NAME) {
754                     PyObject *id;
755                     expr_ty name;
756                     if (!forbidden_check(c, n, STR(CHILD(ch, 0))))
757                         return NULL;
758                     id = NEW_IDENTIFIER(CHILD(ch, 0));
759                     if (!id)
760                         return NULL;
761                     name = Name(id, Param, LINENO(ch), ch->n_col_offset,
762                                 c->c_arena);
763                     if (!name)
764                         return NULL;
765                     asdl_seq_SET(args, k++, name);
766 
767                 }
768                 i += 2; /* the name and the comma */
769                 if (parenthesized && Py_Py3kWarningFlag &&
770                     !ast_warn(c, ch, "parenthesized argument names "
771                               "are invalid in 3.x"))
772                     return NULL;
773 
774                 break;
775             }
776             case STAR:
777                 if (!forbidden_check(c, CHILD(n, i+1), STR(CHILD(n, i+1))))
778                     return NULL;
779                 vararg = NEW_IDENTIFIER(CHILD(n, i+1));
780                 if (!vararg)
781                     return NULL;
782                 i += 3;
783                 break;
784             case DOUBLESTAR:
785                 if (!forbidden_check(c, CHILD(n, i+1), STR(CHILD(n, i+1))))
786                     return NULL;
787                 kwarg = NEW_IDENTIFIER(CHILD(n, i+1));
788                 if (!kwarg)
789                     return NULL;
790                 i += 3;
791                 break;
792             default:
793                 PyErr_Format(PyExc_SystemError,
794                              "unexpected node in varargslist: %d @ %d",
795                              TYPE(ch), i);
796                 return NULL;
797         }
798     }
799 
800     return arguments(args, vararg, kwarg, defaults, c->c_arena);
801 }
802 
803 static expr_ty
ast_for_dotted_name(struct compiling * c,const node * n)804 ast_for_dotted_name(struct compiling *c, const node *n)
805 {
806     expr_ty e;
807     identifier id;
808     int lineno, col_offset;
809     int i;
810 
811     REQ(n, dotted_name);
812 
813     lineno = LINENO(n);
814     col_offset = n->n_col_offset;
815 
816     id = NEW_IDENTIFIER(CHILD(n, 0));
817     if (!id)
818         return NULL;
819     e = Name(id, Load, lineno, col_offset, c->c_arena);
820     if (!e)
821         return NULL;
822 
823     for (i = 2; i < NCH(n); i+=2) {
824         id = NEW_IDENTIFIER(CHILD(n, i));
825         if (!id)
826             return NULL;
827         e = Attribute(e, id, Load, lineno, col_offset, c->c_arena);
828         if (!e)
829             return NULL;
830     }
831 
832     return e;
833 }
834 
835 static expr_ty
ast_for_decorator(struct compiling * c,const node * n)836 ast_for_decorator(struct compiling *c, const node *n)
837 {
838     /* decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE */
839     expr_ty d = NULL;
840     expr_ty name_expr;
841 
842     REQ(n, decorator);
843     REQ(CHILD(n, 0), AT);
844     REQ(RCHILD(n, -1), NEWLINE);
845 
846     name_expr = ast_for_dotted_name(c, CHILD(n, 1));
847     if (!name_expr)
848         return NULL;
849 
850     if (NCH(n) == 3) { /* No arguments */
851         d = name_expr;
852         name_expr = NULL;
853     }
854     else if (NCH(n) == 5) { /* Call with no arguments */
855         d = Call(name_expr, NULL, NULL, NULL, NULL,
856                  name_expr->lineno, name_expr->col_offset,
857                  c->c_arena);
858         if (!d)
859             return NULL;
860         name_expr = NULL;
861     }
862     else {
863         d = ast_for_call(c, CHILD(n, 3), name_expr);
864         if (!d)
865             return NULL;
866         name_expr = NULL;
867     }
868 
869     return d;
870 }
871 
872 static asdl_seq*
ast_for_decorators(struct compiling * c,const node * n)873 ast_for_decorators(struct compiling *c, const node *n)
874 {
875     asdl_seq* decorator_seq;
876     expr_ty d;
877     int i;
878 
879     REQ(n, decorators);
880     decorator_seq = asdl_seq_new(NCH(n), c->c_arena);
881     if (!decorator_seq)
882         return NULL;
883 
884     for (i = 0; i < NCH(n); i++) {
885         d = ast_for_decorator(c, CHILD(n, i));
886         if (!d)
887             return NULL;
888         asdl_seq_SET(decorator_seq, i, d);
889     }
890     return decorator_seq;
891 }
892 
893 static stmt_ty
ast_for_funcdef(struct compiling * c,const node * n,asdl_seq * decorator_seq)894 ast_for_funcdef(struct compiling *c, const node *n, asdl_seq *decorator_seq)
895 {
896     /* funcdef: 'def' NAME parameters ':' suite */
897     identifier name;
898     arguments_ty args;
899     asdl_seq *body;
900     int name_i = 1;
901 
902     REQ(n, funcdef);
903 
904     name = NEW_IDENTIFIER(CHILD(n, name_i));
905     if (!name)
906         return NULL;
907     else if (!forbidden_check(c, CHILD(n, name_i), STR(CHILD(n, name_i))))
908         return NULL;
909     args = ast_for_arguments(c, CHILD(n, name_i + 1));
910     if (!args)
911         return NULL;
912     body = ast_for_suite(c, CHILD(n, name_i + 3));
913     if (!body)
914         return NULL;
915 
916     return FunctionDef(name, args, body, decorator_seq, LINENO(n),
917                        n->n_col_offset, c->c_arena);
918 }
919 
920 static stmt_ty
ast_for_decorated(struct compiling * c,const node * n)921 ast_for_decorated(struct compiling *c, const node *n)
922 {
923     /* decorated: decorators (classdef | funcdef) */
924     stmt_ty thing = NULL;
925     asdl_seq *decorator_seq = NULL;
926 
927     REQ(n, decorated);
928 
929     decorator_seq = ast_for_decorators(c, CHILD(n, 0));
930     if (!decorator_seq)
931       return NULL;
932 
933     assert(TYPE(CHILD(n, 1)) == funcdef ||
934            TYPE(CHILD(n, 1)) == classdef);
935 
936     if (TYPE(CHILD(n, 1)) == funcdef) {
937       thing = ast_for_funcdef(c, CHILD(n, 1), decorator_seq);
938     } else if (TYPE(CHILD(n, 1)) == classdef) {
939       thing = ast_for_classdef(c, CHILD(n, 1), decorator_seq);
940     }
941     /* we count the decorators in when talking about the class' or
942        function's line number */
943     if (thing) {
944         thing->lineno = LINENO(n);
945         thing->col_offset = n->n_col_offset;
946     }
947     return thing;
948 }
949 
950 static expr_ty
ast_for_lambdef(struct compiling * c,const node * n)951 ast_for_lambdef(struct compiling *c, const node *n)
952 {
953     /* lambdef: 'lambda' [varargslist] ':' test */
954     arguments_ty args;
955     expr_ty expression;
956 
957     if (NCH(n) == 3) {
958         args = arguments(NULL, NULL, NULL, NULL, c->c_arena);
959         if (!args)
960             return NULL;
961         expression = ast_for_expr(c, CHILD(n, 2));
962         if (!expression)
963             return NULL;
964     }
965     else {
966         args = ast_for_arguments(c, CHILD(n, 1));
967         if (!args)
968             return NULL;
969         expression = ast_for_expr(c, CHILD(n, 3));
970         if (!expression)
971             return NULL;
972     }
973 
974     return Lambda(args, expression, LINENO(n), n->n_col_offset, c->c_arena);
975 }
976 
977 static expr_ty
ast_for_ifexpr(struct compiling * c,const node * n)978 ast_for_ifexpr(struct compiling *c, const node *n)
979 {
980     /* test: or_test 'if' or_test 'else' test */
981     expr_ty expression, body, orelse;
982 
983     assert(NCH(n) == 5);
984     body = ast_for_expr(c, CHILD(n, 0));
985     if (!body)
986         return NULL;
987     expression = ast_for_expr(c, CHILD(n, 2));
988     if (!expression)
989         return NULL;
990     orelse = ast_for_expr(c, CHILD(n, 4));
991     if (!orelse)
992         return NULL;
993     return IfExp(expression, body, orelse, LINENO(n), n->n_col_offset,
994                  c->c_arena);
995 }
996 
997 /* XXX(nnorwitz): the listcomp and genexpr code should be refactored
998    so there is only a single version.  Possibly for loops can also re-use
999    the code.
1000 */
1001 
1002 /* Count the number of 'for' loop in a list comprehension.
1003 
1004    Helper for ast_for_listcomp().
1005 */
1006 
1007 static int
count_list_fors(struct compiling * c,const node * n)1008 count_list_fors(struct compiling *c, const node *n)
1009 {
1010     int n_fors = 0;
1011     node *ch = CHILD(n, 1);
1012 
1013  count_list_for:
1014     n_fors++;
1015     REQ(ch, list_for);
1016     if (NCH(ch) == 5)
1017         ch = CHILD(ch, 4);
1018     else
1019         return n_fors;
1020  count_list_iter:
1021     REQ(ch, list_iter);
1022     ch = CHILD(ch, 0);
1023     if (TYPE(ch) == list_for)
1024         goto count_list_for;
1025     else if (TYPE(ch) == list_if) {
1026         if (NCH(ch) == 3) {
1027             ch = CHILD(ch, 2);
1028             goto count_list_iter;
1029         }
1030         else
1031             return n_fors;
1032     }
1033 
1034     /* Should never be reached */
1035     PyErr_SetString(PyExc_SystemError, "logic error in count_list_fors");
1036     return -1;
1037 }
1038 
1039 /* Count the number of 'if' statements in a list comprehension.
1040 
1041    Helper for ast_for_listcomp().
1042 */
1043 
1044 static int
count_list_ifs(struct compiling * c,const node * n)1045 count_list_ifs(struct compiling *c, const node *n)
1046 {
1047     int n_ifs = 0;
1048 
1049  count_list_iter:
1050     REQ(n, list_iter);
1051     if (TYPE(CHILD(n, 0)) == list_for)
1052         return n_ifs;
1053     n = CHILD(n, 0);
1054     REQ(n, list_if);
1055     n_ifs++;
1056     if (NCH(n) == 2)
1057         return n_ifs;
1058     n = CHILD(n, 2);
1059     goto count_list_iter;
1060 }
1061 
1062 static expr_ty
ast_for_listcomp(struct compiling * c,const node * n)1063 ast_for_listcomp(struct compiling *c, const node *n)
1064 {
1065     /* listmaker: test ( list_for | (',' test)* [','] )
1066        list_for: 'for' exprlist 'in' testlist_safe [list_iter]
1067        list_iter: list_for | list_if
1068        list_if: 'if' test [list_iter]
1069        testlist_safe: test [(',' test)+ [',']]
1070     */
1071     expr_ty elt, first;
1072     asdl_seq *listcomps;
1073     int i, n_fors;
1074     node *ch;
1075 
1076     REQ(n, listmaker);
1077     assert(NCH(n) > 1);
1078 
1079     elt = ast_for_expr(c, CHILD(n, 0));
1080     if (!elt)
1081         return NULL;
1082 
1083     n_fors = count_list_fors(c, n);
1084     if (n_fors == -1)
1085         return NULL;
1086 
1087     listcomps = asdl_seq_new(n_fors, c->c_arena);
1088     if (!listcomps)
1089         return NULL;
1090 
1091     ch = CHILD(n, 1);
1092     for (i = 0; i < n_fors; i++) {
1093         comprehension_ty lc;
1094         asdl_seq *t;
1095         expr_ty expression;
1096         node *for_ch;
1097 
1098         REQ(ch, list_for);
1099 
1100         for_ch = CHILD(ch, 1);
1101         t = ast_for_exprlist(c, for_ch, Store);
1102         if (!t)
1103             return NULL;
1104         expression = ast_for_testlist(c, CHILD(ch, 3));
1105         if (!expression)
1106             return NULL;
1107 
1108         /* Check the # of children rather than the length of t, since
1109            [x for x, in ... ] has 1 element in t, but still requires a Tuple.
1110         */
1111         first = (expr_ty)asdl_seq_GET(t, 0);
1112         if (NCH(for_ch) == 1)
1113             lc = comprehension(first, expression, NULL, c->c_arena);
1114         else
1115             lc = comprehension(Tuple(t, Store, first->lineno, first->col_offset,
1116                                      c->c_arena),
1117                                expression, NULL, c->c_arena);
1118         if (!lc)
1119             return NULL;
1120 
1121         if (NCH(ch) == 5) {
1122             int j, n_ifs;
1123             asdl_seq *ifs;
1124             expr_ty list_for_expr;
1125 
1126             ch = CHILD(ch, 4);
1127             n_ifs = count_list_ifs(c, ch);
1128             if (n_ifs == -1)
1129                 return NULL;
1130 
1131             ifs = asdl_seq_new(n_ifs, c->c_arena);
1132             if (!ifs)
1133                 return NULL;
1134 
1135             for (j = 0; j < n_ifs; j++) {
1136                 REQ(ch, list_iter);
1137                 ch = CHILD(ch, 0);
1138                 REQ(ch, list_if);
1139 
1140                 list_for_expr = ast_for_expr(c, CHILD(ch, 1));
1141                 if (!list_for_expr)
1142                     return NULL;
1143 
1144                 asdl_seq_SET(ifs, j, list_for_expr);
1145                 if (NCH(ch) == 3)
1146                     ch = CHILD(ch, 2);
1147             }
1148             /* on exit, must guarantee that ch is a list_for */
1149             if (TYPE(ch) == list_iter)
1150                 ch = CHILD(ch, 0);
1151             lc->ifs = ifs;
1152         }
1153         asdl_seq_SET(listcomps, i, lc);
1154     }
1155 
1156     return ListComp(elt, listcomps, LINENO(n), n->n_col_offset, c->c_arena);
1157 }
1158 
1159 /*
1160    Count the number of 'for' loops in a comprehension.
1161 
1162    Helper for ast_for_comprehension().
1163 */
1164 
1165 static int
count_comp_fors(struct compiling * c,const node * n)1166 count_comp_fors(struct compiling *c, const node *n)
1167 {
1168     int n_fors = 0;
1169 
1170   count_comp_for:
1171     n_fors++;
1172     REQ(n, comp_for);
1173     if (NCH(n) == 5)
1174         n = CHILD(n, 4);
1175     else
1176         return n_fors;
1177   count_comp_iter:
1178     REQ(n, comp_iter);
1179     n = CHILD(n, 0);
1180     if (TYPE(n) == comp_for)
1181         goto count_comp_for;
1182     else if (TYPE(n) == comp_if) {
1183         if (NCH(n) == 3) {
1184             n = CHILD(n, 2);
1185             goto count_comp_iter;
1186         }
1187         else
1188             return n_fors;
1189     }
1190 
1191     /* Should never be reached */
1192     PyErr_SetString(PyExc_SystemError,
1193                     "logic error in count_comp_fors");
1194     return -1;
1195 }
1196 
1197 /* Count the number of 'if' statements in a comprehension.
1198 
1199    Helper for ast_for_comprehension().
1200 */
1201 
1202 static int
count_comp_ifs(struct compiling * c,const node * n)1203 count_comp_ifs(struct compiling *c, const node *n)
1204 {
1205     int n_ifs = 0;
1206 
1207     while (1) {
1208         REQ(n, comp_iter);
1209         if (TYPE(CHILD(n, 0)) == comp_for)
1210             return n_ifs;
1211         n = CHILD(n, 0);
1212         REQ(n, comp_if);
1213         n_ifs++;
1214         if (NCH(n) == 2)
1215             return n_ifs;
1216         n = CHILD(n, 2);
1217     }
1218 }
1219 
1220 static asdl_seq *
ast_for_comprehension(struct compiling * c,const node * n)1221 ast_for_comprehension(struct compiling *c, const node *n)
1222 {
1223     int i, n_fors;
1224     asdl_seq *comps;
1225 
1226     n_fors = count_comp_fors(c, n);
1227     if (n_fors == -1)
1228         return NULL;
1229 
1230     comps = asdl_seq_new(n_fors, c->c_arena);
1231     if (!comps)
1232         return NULL;
1233 
1234     for (i = 0; i < n_fors; i++) {
1235         comprehension_ty comp;
1236         asdl_seq *t;
1237         expr_ty expression, first;
1238         node *for_ch;
1239 
1240         REQ(n, comp_for);
1241 
1242         for_ch = CHILD(n, 1);
1243         t = ast_for_exprlist(c, for_ch, Store);
1244         if (!t)
1245             return NULL;
1246         expression = ast_for_expr(c, CHILD(n, 3));
1247         if (!expression)
1248             return NULL;
1249 
1250         /* Check the # of children rather than the length of t, since
1251            (x for x, in ...) has 1 element in t, but still requires a Tuple. */
1252         first = (expr_ty)asdl_seq_GET(t, 0);
1253         if (NCH(for_ch) == 1)
1254             comp = comprehension(first, expression, NULL, c->c_arena);
1255         else
1256             comp = comprehension(Tuple(t, Store, first->lineno, first->col_offset,
1257                                      c->c_arena),
1258                                expression, NULL, c->c_arena);
1259         if (!comp)
1260             return NULL;
1261 
1262         if (NCH(n) == 5) {
1263             int j, n_ifs;
1264             asdl_seq *ifs;
1265 
1266             n = CHILD(n, 4);
1267             n_ifs = count_comp_ifs(c, n);
1268             if (n_ifs == -1)
1269                 return NULL;
1270 
1271             ifs = asdl_seq_new(n_ifs, c->c_arena);
1272             if (!ifs)
1273                 return NULL;
1274 
1275             for (j = 0; j < n_ifs; j++) {
1276                 REQ(n, comp_iter);
1277                 n = CHILD(n, 0);
1278                 REQ(n, comp_if);
1279 
1280                 expression = ast_for_expr(c, CHILD(n, 1));
1281                 if (!expression)
1282                     return NULL;
1283                 asdl_seq_SET(ifs, j, expression);
1284                 if (NCH(n) == 3)
1285                     n = CHILD(n, 2);
1286             }
1287             /* on exit, must guarantee that n is a comp_for */
1288             if (TYPE(n) == comp_iter)
1289                 n = CHILD(n, 0);
1290             comp->ifs = ifs;
1291         }
1292         asdl_seq_SET(comps, i, comp);
1293     }
1294     return comps;
1295 }
1296 
1297 static expr_ty
ast_for_itercomp(struct compiling * c,const node * n,int type)1298 ast_for_itercomp(struct compiling *c, const node *n, int type)
1299 {
1300     expr_ty elt;
1301     asdl_seq *comps;
1302 
1303     assert(NCH(n) > 1);
1304 
1305     elt = ast_for_expr(c, CHILD(n, 0));
1306     if (!elt)
1307         return NULL;
1308 
1309     comps = ast_for_comprehension(c, CHILD(n, 1));
1310     if (!comps)
1311         return NULL;
1312 
1313     if (type == COMP_GENEXP)
1314         return GeneratorExp(elt, comps, LINENO(n), n->n_col_offset, c->c_arena);
1315     else if (type == COMP_SETCOMP)
1316         return SetComp(elt, comps, LINENO(n), n->n_col_offset, c->c_arena);
1317     else
1318         /* Should never happen */
1319         return NULL;
1320 }
1321 
1322 static expr_ty
ast_for_dictcomp(struct compiling * c,const node * n)1323 ast_for_dictcomp(struct compiling *c, const node *n)
1324 {
1325     expr_ty key, value;
1326     asdl_seq *comps;
1327 
1328     assert(NCH(n) > 3);
1329     REQ(CHILD(n, 1), COLON);
1330 
1331     key = ast_for_expr(c, CHILD(n, 0));
1332     if (!key)
1333         return NULL;
1334 
1335     value = ast_for_expr(c, CHILD(n, 2));
1336     if (!value)
1337         return NULL;
1338 
1339     comps = ast_for_comprehension(c, CHILD(n, 3));
1340     if (!comps)
1341         return NULL;
1342 
1343     return DictComp(key, value, comps, LINENO(n), n->n_col_offset, c->c_arena);
1344 }
1345 
1346 static expr_ty
ast_for_genexp(struct compiling * c,const node * n)1347 ast_for_genexp(struct compiling *c, const node *n)
1348 {
1349     assert(TYPE(n) == (testlist_comp) || TYPE(n) == (argument));
1350     return ast_for_itercomp(c, n, COMP_GENEXP);
1351 }
1352 
1353 static expr_ty
ast_for_setcomp(struct compiling * c,const node * n)1354 ast_for_setcomp(struct compiling *c, const node *n)
1355 {
1356     assert(TYPE(n) == (dictorsetmaker));
1357     return ast_for_itercomp(c, n, COMP_SETCOMP);
1358 }
1359 
1360 static expr_ty
ast_for_atom(struct compiling * c,const node * n)1361 ast_for_atom(struct compiling *c, const node *n)
1362 {
1363     /* atom: '(' [yield_expr|testlist_comp] ')' | '[' [listmaker] ']'
1364        | '{' [dictmaker] '}' | '`' testlist '`' | NAME | NUMBER | STRING+
1365     */
1366     node *ch = CHILD(n, 0);
1367 
1368     switch (TYPE(ch)) {
1369     case NAME: {
1370         /* All names start in Load context, but may later be
1371            changed. */
1372         PyObject *name = NEW_IDENTIFIER(ch);
1373         if (!name)
1374             return NULL;
1375         return Name(name, Load, LINENO(n), n->n_col_offset, c->c_arena);
1376     }
1377     case STRING: {
1378         PyObject *str = parsestrplus(c, n);
1379         if (!str) {
1380 #ifdef Py_USING_UNICODE
1381             if (PyErr_ExceptionMatches(PyExc_UnicodeError)){
1382                 PyObject *type, *value, *tback, *errstr;
1383                 PyErr_Fetch(&type, &value, &tback);
1384                 errstr = PyObject_Str(value);
1385                 if (errstr) {
1386                     char *s = "";
1387                     char buf[128];
1388                     s = PyString_AsString(errstr);
1389                     PyOS_snprintf(buf, sizeof(buf), "(unicode error) %s", s);
1390                     ast_error(n, buf);
1391                     Py_DECREF(errstr);
1392                 } else {
1393                     ast_error(n, "(unicode error) unknown error");
1394                 }
1395                 Py_DECREF(type);
1396                 Py_DECREF(value);
1397                 Py_XDECREF(tback);
1398             }
1399 #endif
1400             return NULL;
1401         }
1402         PyArena_AddPyObject(c->c_arena, str);
1403         return Str(str, LINENO(n), n->n_col_offset, c->c_arena);
1404     }
1405     case NUMBER: {
1406         PyObject *pynum = parsenumber(c, STR(ch));
1407         if (!pynum)
1408             return NULL;
1409 
1410         PyArena_AddPyObject(c->c_arena, pynum);
1411         return Num(pynum, LINENO(n), n->n_col_offset, c->c_arena);
1412     }
1413     case LPAR: /* some parenthesized expressions */
1414         ch = CHILD(n, 1);
1415 
1416         if (TYPE(ch) == RPAR)
1417             return Tuple(NULL, Load, LINENO(n), n->n_col_offset, c->c_arena);
1418 
1419         if (TYPE(ch) == yield_expr)
1420             return ast_for_expr(c, ch);
1421 
1422         return ast_for_testlist_comp(c, ch);
1423     case LSQB: /* list (or list comprehension) */
1424         ch = CHILD(n, 1);
1425 
1426         if (TYPE(ch) == RSQB)
1427             return List(NULL, Load, LINENO(n), n->n_col_offset, c->c_arena);
1428 
1429         REQ(ch, listmaker);
1430         if (NCH(ch) == 1 || TYPE(CHILD(ch, 1)) == COMMA) {
1431             asdl_seq *elts = seq_for_testlist(c, ch);
1432             if (!elts)
1433                 return NULL;
1434 
1435             return List(elts, Load, LINENO(n), n->n_col_offset, c->c_arena);
1436         }
1437         else
1438             return ast_for_listcomp(c, ch);
1439     case LBRACE: {
1440         /* dictorsetmaker:
1441          *    (test ':' test (comp_for | (',' test ':' test)* [','])) |
1442          *    (test (comp_for | (',' test)* [',']))
1443          */
1444         int i, size;
1445         asdl_seq *keys, *values;
1446 
1447         ch = CHILD(n, 1);
1448         if (TYPE(ch) == RBRACE) {
1449             /* it's an empty dict */
1450             return Dict(NULL, NULL, LINENO(n), n->n_col_offset, c->c_arena);
1451         } else if (NCH(ch) == 1 || TYPE(CHILD(ch, 1)) == COMMA) {
1452             /* it's a simple set */
1453             asdl_seq *elts;
1454             size = (NCH(ch) + 1) / 2; /* +1 in case no trailing comma */
1455             elts = asdl_seq_new(size, c->c_arena);
1456             if (!elts)
1457                 return NULL;
1458             for (i = 0; i < NCH(ch); i += 2) {
1459                 expr_ty expression;
1460                 expression = ast_for_expr(c, CHILD(ch, i));
1461                 if (!expression)
1462                     return NULL;
1463                 asdl_seq_SET(elts, i / 2, expression);
1464             }
1465             return Set(elts, LINENO(n), n->n_col_offset, c->c_arena);
1466         } else if (TYPE(CHILD(ch, 1)) == comp_for) {
1467             /* it's a set comprehension */
1468             return ast_for_setcomp(c, ch);
1469         } else if (NCH(ch) > 3 && TYPE(CHILD(ch, 3)) == comp_for) {
1470             return ast_for_dictcomp(c, ch);
1471         } else {
1472             /* it's a dict */
1473             size = (NCH(ch) + 1) / 4; /* +1 in case no trailing comma */
1474             keys = asdl_seq_new(size, c->c_arena);
1475             if (!keys)
1476                 return NULL;
1477 
1478             values = asdl_seq_new(size, c->c_arena);
1479             if (!values)
1480                 return NULL;
1481 
1482             for (i = 0; i < NCH(ch); i += 4) {
1483                 expr_ty expression;
1484 
1485                 expression = ast_for_expr(c, CHILD(ch, i));
1486                 if (!expression)
1487                     return NULL;
1488 
1489                 asdl_seq_SET(keys, i / 4, expression);
1490 
1491                 expression = ast_for_expr(c, CHILD(ch, i + 2));
1492                 if (!expression)
1493                     return NULL;
1494 
1495                 asdl_seq_SET(values, i / 4, expression);
1496             }
1497             return Dict(keys, values, LINENO(n), n->n_col_offset, c->c_arena);
1498         }
1499     }
1500     case BACKQUOTE: { /* repr */
1501         expr_ty expression;
1502         if (Py_Py3kWarningFlag &&
1503             !ast_warn(c, n, "backquote not supported in 3.x; use repr()"))
1504             return NULL;
1505         expression = ast_for_testlist(c, CHILD(n, 1));
1506         if (!expression)
1507             return NULL;
1508 
1509         return Repr(expression, LINENO(n), n->n_col_offset, c->c_arena);
1510     }
1511     default:
1512         PyErr_Format(PyExc_SystemError, "unhandled atom %d", TYPE(ch));
1513         return NULL;
1514     }
1515 }
1516 
1517 static slice_ty
ast_for_slice(struct compiling * c,const node * n)1518 ast_for_slice(struct compiling *c, const node *n)
1519 {
1520     node *ch;
1521     expr_ty lower = NULL, upper = NULL, step = NULL;
1522 
1523     REQ(n, subscript);
1524 
1525     /*
1526        subscript: '.' '.' '.' | test | [test] ':' [test] [sliceop]
1527        sliceop: ':' [test]
1528     */
1529     ch = CHILD(n, 0);
1530     if (TYPE(ch) == DOT)
1531         return Ellipsis(c->c_arena);
1532 
1533     if (NCH(n) == 1 && TYPE(ch) == test) {
1534         /* 'step' variable hold no significance in terms of being used over
1535            other vars */
1536         step = ast_for_expr(c, ch);
1537         if (!step)
1538             return NULL;
1539 
1540         return Index(step, c->c_arena);
1541     }
1542 
1543     if (TYPE(ch) == test) {
1544         lower = ast_for_expr(c, ch);
1545         if (!lower)
1546             return NULL;
1547     }
1548 
1549     /* If there's an upper bound it's in the second or third position. */
1550     if (TYPE(ch) == COLON) {
1551         if (NCH(n) > 1) {
1552             node *n2 = CHILD(n, 1);
1553 
1554             if (TYPE(n2) == test) {
1555                 upper = ast_for_expr(c, n2);
1556                 if (!upper)
1557                     return NULL;
1558             }
1559         }
1560     } else if (NCH(n) > 2) {
1561         node *n2 = CHILD(n, 2);
1562 
1563         if (TYPE(n2) == test) {
1564             upper = ast_for_expr(c, n2);
1565             if (!upper)
1566                 return NULL;
1567         }
1568     }
1569 
1570     ch = CHILD(n, NCH(n) - 1);
1571     if (TYPE(ch) == sliceop) {
1572         if (NCH(ch) == 1) {
1573             /*
1574               This is an extended slice (ie "x[::]") with no expression in the
1575               step field. We set this literally to "None" in order to
1576               disambiguate it from x[:]. (The interpreter might have to call
1577               __getslice__ for x[:], but it must call __getitem__ for x[::].)
1578             */
1579             identifier none = new_identifier("None", c->c_arena);
1580             if (!none)
1581                 return NULL;
1582             ch = CHILD(ch, 0);
1583             step = Name(none, Load, LINENO(ch), ch->n_col_offset, c->c_arena);
1584             if (!step)
1585                 return NULL;
1586         } else {
1587             ch = CHILD(ch, 1);
1588             if (TYPE(ch) == test) {
1589                 step = ast_for_expr(c, ch);
1590                 if (!step)
1591                     return NULL;
1592             }
1593         }
1594     }
1595 
1596     return Slice(lower, upper, step, c->c_arena);
1597 }
1598 
1599 static expr_ty
ast_for_binop(struct compiling * c,const node * n)1600 ast_for_binop(struct compiling *c, const node *n)
1601 {
1602         /* Must account for a sequence of expressions.
1603            How should A op B op C by represented?
1604            BinOp(BinOp(A, op, B), op, C).
1605         */
1606 
1607         int i, nops;
1608         expr_ty expr1, expr2, result;
1609         operator_ty newoperator;
1610 
1611         expr1 = ast_for_expr(c, CHILD(n, 0));
1612         if (!expr1)
1613             return NULL;
1614 
1615         expr2 = ast_for_expr(c, CHILD(n, 2));
1616         if (!expr2)
1617             return NULL;
1618 
1619         newoperator = get_operator(CHILD(n, 1));
1620         if (!newoperator)
1621             return NULL;
1622 
1623         result = BinOp(expr1, newoperator, expr2, LINENO(n), n->n_col_offset,
1624                        c->c_arena);
1625         if (!result)
1626             return NULL;
1627 
1628         nops = (NCH(n) - 1) / 2;
1629         for (i = 1; i < nops; i++) {
1630                 expr_ty tmp_result, tmp;
1631                 const node* next_oper = CHILD(n, i * 2 + 1);
1632 
1633                 newoperator = get_operator(next_oper);
1634                 if (!newoperator)
1635                     return NULL;
1636 
1637                 tmp = ast_for_expr(c, CHILD(n, i * 2 + 2));
1638                 if (!tmp)
1639                     return NULL;
1640 
1641                 tmp_result = BinOp(result, newoperator, tmp,
1642                                    LINENO(next_oper), next_oper->n_col_offset,
1643                                    c->c_arena);
1644                 if (!tmp_result)
1645                         return NULL;
1646                 result = tmp_result;
1647         }
1648         return result;
1649 }
1650 
1651 static expr_ty
ast_for_trailer(struct compiling * c,const node * n,expr_ty left_expr)1652 ast_for_trailer(struct compiling *c, const node *n, expr_ty left_expr)
1653 {
1654     /* trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
1655        subscriptlist: subscript (',' subscript)* [',']
1656        subscript: '.' '.' '.' | test | [test] ':' [test] [sliceop]
1657      */
1658     REQ(n, trailer);
1659     if (TYPE(CHILD(n, 0)) == LPAR) {
1660         if (NCH(n) == 2)
1661             return Call(left_expr, NULL, NULL, NULL, NULL, LINENO(n),
1662                         n->n_col_offset, c->c_arena);
1663         else
1664             return ast_for_call(c, CHILD(n, 1), left_expr);
1665     }
1666     else if (TYPE(CHILD(n, 0)) == DOT ) {
1667         PyObject *attr_id = NEW_IDENTIFIER(CHILD(n, 1));
1668         if (!attr_id)
1669             return NULL;
1670         return Attribute(left_expr, attr_id, Load,
1671                          LINENO(n), n->n_col_offset, c->c_arena);
1672     }
1673     else {
1674         REQ(CHILD(n, 0), LSQB);
1675         REQ(CHILD(n, 2), RSQB);
1676         n = CHILD(n, 1);
1677         if (NCH(n) == 1) {
1678             slice_ty slc = ast_for_slice(c, CHILD(n, 0));
1679             if (!slc)
1680                 return NULL;
1681             return Subscript(left_expr, slc, Load, LINENO(n), n->n_col_offset,
1682                              c->c_arena);
1683         }
1684         else {
1685             /* The grammar is ambiguous here. The ambiguity is resolved
1686                by treating the sequence as a tuple literal if there are
1687                no slice features.
1688             */
1689             int j;
1690             slice_ty slc;
1691             expr_ty e;
1692             bool simple = true;
1693             asdl_seq *slices, *elts;
1694             slices = asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
1695             if (!slices)
1696                 return NULL;
1697             for (j = 0; j < NCH(n); j += 2) {
1698                 slc = ast_for_slice(c, CHILD(n, j));
1699                 if (!slc)
1700                     return NULL;
1701                 if (slc->kind != Index_kind)
1702                     simple = false;
1703                 asdl_seq_SET(slices, j / 2, slc);
1704             }
1705             if (!simple) {
1706                 return Subscript(left_expr, ExtSlice(slices, c->c_arena),
1707                                  Load, LINENO(n), n->n_col_offset, c->c_arena);
1708             }
1709             /* extract Index values and put them in a Tuple */
1710             elts = asdl_seq_new(asdl_seq_LEN(slices), c->c_arena);
1711             if (!elts)
1712                 return NULL;
1713             for (j = 0; j < asdl_seq_LEN(slices); ++j) {
1714                 slc = (slice_ty)asdl_seq_GET(slices, j);
1715                 assert(slc->kind == Index_kind  && slc->v.Index.value);
1716                 asdl_seq_SET(elts, j, slc->v.Index.value);
1717             }
1718             e = Tuple(elts, Load, LINENO(n), n->n_col_offset, c->c_arena);
1719             if (!e)
1720                 return NULL;
1721             return Subscript(left_expr, Index(e, c->c_arena),
1722                              Load, LINENO(n), n->n_col_offset, c->c_arena);
1723         }
1724     }
1725 }
1726 
1727 static expr_ty
ast_for_factor(struct compiling * c,const node * n)1728 ast_for_factor(struct compiling *c, const node *n)
1729 {
1730     node *pfactor, *ppower, *patom, *pnum;
1731     expr_ty expression;
1732 
1733     /* If the unary - operator is applied to a constant, don't generate
1734        a UNARY_NEGATIVE opcode.  Just store the approriate value as a
1735        constant.  The peephole optimizer already does something like
1736        this but it doesn't handle the case where the constant is
1737        (sys.maxint - 1).  In that case, we want a PyIntObject, not a
1738        PyLongObject.
1739     */
1740     if (TYPE(CHILD(n, 0)) == MINUS &&
1741         NCH(n) == 2 &&
1742         TYPE((pfactor = CHILD(n, 1))) == factor &&
1743         NCH(pfactor) == 1 &&
1744         TYPE((ppower = CHILD(pfactor, 0))) == power &&
1745         NCH(ppower) == 1 &&
1746         TYPE((patom = CHILD(ppower, 0))) == atom &&
1747         TYPE((pnum = CHILD(patom, 0))) == NUMBER) {
1748         PyObject *pynum;
1749         char *s = PyObject_MALLOC(strlen(STR(pnum)) + 2);
1750         if (s == NULL)
1751             return NULL;
1752         s[0] = '-';
1753         strcpy(s + 1, STR(pnum));
1754         pynum = parsenumber(c, s);
1755         PyObject_FREE(s);
1756         if (!pynum)
1757             return NULL;
1758 
1759         PyArena_AddPyObject(c->c_arena, pynum);
1760         return Num(pynum, LINENO(n), n->n_col_offset, c->c_arena);
1761     }
1762 
1763     expression = ast_for_expr(c, CHILD(n, 1));
1764     if (!expression)
1765         return NULL;
1766 
1767     switch (TYPE(CHILD(n, 0))) {
1768         case PLUS:
1769             return UnaryOp(UAdd, expression, LINENO(n), n->n_col_offset,
1770                            c->c_arena);
1771         case MINUS:
1772             return UnaryOp(USub, expression, LINENO(n), n->n_col_offset,
1773                            c->c_arena);
1774         case TILDE:
1775             return UnaryOp(Invert, expression, LINENO(n),
1776                            n->n_col_offset, c->c_arena);
1777     }
1778     PyErr_Format(PyExc_SystemError, "unhandled factor: %d",
1779                  TYPE(CHILD(n, 0)));
1780     return NULL;
1781 }
1782 
1783 static expr_ty
ast_for_power(struct compiling * c,const node * n)1784 ast_for_power(struct compiling *c, const node *n)
1785 {
1786     /* power: atom trailer* ('**' factor)*
1787      */
1788     int i;
1789     expr_ty e, tmp;
1790     REQ(n, power);
1791     e = ast_for_atom(c, CHILD(n, 0));
1792     if (!e)
1793         return NULL;
1794     if (NCH(n) == 1)
1795         return e;
1796     for (i = 1; i < NCH(n); i++) {
1797         node *ch = CHILD(n, i);
1798         if (TYPE(ch) != trailer)
1799             break;
1800         tmp = ast_for_trailer(c, ch, e);
1801         if (!tmp)
1802             return NULL;
1803         tmp->lineno = e->lineno;
1804         tmp->col_offset = e->col_offset;
1805         e = tmp;
1806     }
1807     if (TYPE(CHILD(n, NCH(n) - 1)) == factor) {
1808         expr_ty f = ast_for_expr(c, CHILD(n, NCH(n) - 1));
1809         if (!f)
1810             return NULL;
1811         tmp = BinOp(e, Pow, f, LINENO(n), n->n_col_offset, c->c_arena);
1812         if (!tmp)
1813             return NULL;
1814         e = tmp;
1815     }
1816     return e;
1817 }
1818 
1819 /* Do not name a variable 'expr'!  Will cause a compile error.
1820 */
1821 
1822 static expr_ty
ast_for_expr(struct compiling * c,const node * n)1823 ast_for_expr(struct compiling *c, const node *n)
1824 {
1825     /* handle the full range of simple expressions
1826        test: or_test ['if' or_test 'else' test] | lambdef
1827        or_test: and_test ('or' and_test)*
1828        and_test: not_test ('and' not_test)*
1829        not_test: 'not' not_test | comparison
1830        comparison: expr (comp_op expr)*
1831        expr: xor_expr ('|' xor_expr)*
1832        xor_expr: and_expr ('^' and_expr)*
1833        and_expr: shift_expr ('&' shift_expr)*
1834        shift_expr: arith_expr (('<<'|'>>') arith_expr)*
1835        arith_expr: term (('+'|'-') term)*
1836        term: factor (('*'|'/'|'%'|'//') factor)*
1837        factor: ('+'|'-'|'~') factor | power
1838        power: atom trailer* ('**' factor)*
1839 
1840        As well as modified versions that exist for backward compatibility,
1841        to explicitly allow:
1842        [ x for x in lambda: 0, lambda: 1 ]
1843        (which would be ambiguous without these extra rules)
1844 
1845        old_test: or_test | old_lambdef
1846        old_lambdef: 'lambda' [vararglist] ':' old_test
1847 
1848     */
1849 
1850     asdl_seq *seq;
1851     int i;
1852 
1853  loop:
1854     switch (TYPE(n)) {
1855         case test:
1856         case old_test:
1857             if (TYPE(CHILD(n, 0)) == lambdef ||
1858                 TYPE(CHILD(n, 0)) == old_lambdef)
1859                 return ast_for_lambdef(c, CHILD(n, 0));
1860             else if (NCH(n) > 1)
1861                 return ast_for_ifexpr(c, n);
1862             /* Fallthrough */
1863         case or_test:
1864         case and_test:
1865             if (NCH(n) == 1) {
1866                 n = CHILD(n, 0);
1867                 goto loop;
1868             }
1869             seq = asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
1870             if (!seq)
1871                 return NULL;
1872             for (i = 0; i < NCH(n); i += 2) {
1873                 expr_ty e = ast_for_expr(c, CHILD(n, i));
1874                 if (!e)
1875                     return NULL;
1876                 asdl_seq_SET(seq, i / 2, e);
1877             }
1878             if (!strcmp(STR(CHILD(n, 1)), "and"))
1879                 return BoolOp(And, seq, LINENO(n), n->n_col_offset,
1880                               c->c_arena);
1881             assert(!strcmp(STR(CHILD(n, 1)), "or"));
1882             return BoolOp(Or, seq, LINENO(n), n->n_col_offset, c->c_arena);
1883         case not_test:
1884             if (NCH(n) == 1) {
1885                 n = CHILD(n, 0);
1886                 goto loop;
1887             }
1888             else {
1889                 expr_ty expression = ast_for_expr(c, CHILD(n, 1));
1890                 if (!expression)
1891                     return NULL;
1892 
1893                 return UnaryOp(Not, expression, LINENO(n), n->n_col_offset,
1894                                c->c_arena);
1895             }
1896         case comparison:
1897             if (NCH(n) == 1) {
1898                 n = CHILD(n, 0);
1899                 goto loop;
1900             }
1901             else {
1902                 expr_ty expression;
1903                 asdl_int_seq *ops;
1904                 asdl_seq *cmps;
1905                 ops = asdl_int_seq_new(NCH(n) / 2, c->c_arena);
1906                 if (!ops)
1907                     return NULL;
1908                 cmps = asdl_seq_new(NCH(n) / 2, c->c_arena);
1909                 if (!cmps) {
1910                     return NULL;
1911                 }
1912                 for (i = 1; i < NCH(n); i += 2) {
1913                     cmpop_ty newoperator;
1914 
1915                     newoperator = ast_for_comp_op(c, CHILD(n, i));
1916                     if (!newoperator) {
1917                         return NULL;
1918                     }
1919 
1920                     expression = ast_for_expr(c, CHILD(n, i + 1));
1921                     if (!expression) {
1922                         return NULL;
1923                     }
1924 
1925                     asdl_seq_SET(ops, i / 2, newoperator);
1926                     asdl_seq_SET(cmps, i / 2, expression);
1927                 }
1928                 expression = ast_for_expr(c, CHILD(n, 0));
1929                 if (!expression) {
1930                     return NULL;
1931                 }
1932 
1933                 return Compare(expression, ops, cmps, LINENO(n),
1934                                n->n_col_offset, c->c_arena);
1935             }
1936             break;
1937 
1938         /* The next five cases all handle BinOps.  The main body of code
1939            is the same in each case, but the switch turned inside out to
1940            reuse the code for each type of operator.
1941          */
1942         case expr:
1943         case xor_expr:
1944         case and_expr:
1945         case shift_expr:
1946         case arith_expr:
1947         case term:
1948             if (NCH(n) == 1) {
1949                 n = CHILD(n, 0);
1950                 goto loop;
1951             }
1952             return ast_for_binop(c, n);
1953         case yield_expr: {
1954             expr_ty exp = NULL;
1955             if (NCH(n) == 2) {
1956                 exp = ast_for_testlist(c, CHILD(n, 1));
1957                 if (!exp)
1958                     return NULL;
1959             }
1960             return Yield(exp, LINENO(n), n->n_col_offset, c->c_arena);
1961         }
1962         case factor:
1963             if (NCH(n) == 1) {
1964                 n = CHILD(n, 0);
1965                 goto loop;
1966             }
1967             return ast_for_factor(c, n);
1968         case power:
1969             return ast_for_power(c, n);
1970         default:
1971             PyErr_Format(PyExc_SystemError, "unhandled expr: %d", TYPE(n));
1972             return NULL;
1973     }
1974     /* should never get here unless if error is set */
1975     return NULL;
1976 }
1977 
1978 static expr_ty
ast_for_call(struct compiling * c,const node * n,expr_ty func)1979 ast_for_call(struct compiling *c, const node *n, expr_ty func)
1980 {
1981     /*
1982       arglist: (argument ',')* (argument [',']| '*' test [',' '**' test]
1983                | '**' test)
1984       argument: [test '='] test [comp_for]        # Really [keyword '='] test
1985     */
1986 
1987     int i, nargs, nkeywords, ngens;
1988     asdl_seq *args;
1989     asdl_seq *keywords;
1990     expr_ty vararg = NULL, kwarg = NULL;
1991 
1992     REQ(n, arglist);
1993 
1994     nargs = 0;
1995     nkeywords = 0;
1996     ngens = 0;
1997     for (i = 0; i < NCH(n); i++) {
1998         node *ch = CHILD(n, i);
1999         if (TYPE(ch) == argument) {
2000             if (NCH(ch) == 1)
2001                 nargs++;
2002             else if (TYPE(CHILD(ch, 1)) == comp_for)
2003                 ngens++;
2004             else
2005                 nkeywords++;
2006         }
2007     }
2008     if (ngens > 1 || (ngens && (nargs || nkeywords))) {
2009         ast_error(n, "Generator expression must be parenthesized "
2010                   "if not sole argument");
2011         return NULL;
2012     }
2013 
2014     if (nargs + nkeywords + ngens > 255) {
2015       ast_error(n, "more than 255 arguments");
2016       return NULL;
2017     }
2018 
2019     args = asdl_seq_new(nargs + ngens, c->c_arena);
2020     if (!args)
2021         return NULL;
2022     keywords = asdl_seq_new(nkeywords, c->c_arena);
2023     if (!keywords)
2024         return NULL;
2025     nargs = 0;
2026     nkeywords = 0;
2027     for (i = 0; i < NCH(n); i++) {
2028         node *ch = CHILD(n, i);
2029         if (TYPE(ch) == argument) {
2030             expr_ty e;
2031             if (NCH(ch) == 1) {
2032                 if (nkeywords) {
2033                     ast_error(CHILD(ch, 0),
2034                               "non-keyword arg after keyword arg");
2035                     return NULL;
2036                 }
2037                 if (vararg) {
2038                     ast_error(CHILD(ch, 0),
2039                               "only named arguments may follow *expression");
2040                     return NULL;
2041                 }
2042                 e = ast_for_expr(c, CHILD(ch, 0));
2043                 if (!e)
2044                     return NULL;
2045                 asdl_seq_SET(args, nargs++, e);
2046             }
2047             else if (TYPE(CHILD(ch, 1)) == comp_for) {
2048                 e = ast_for_genexp(c, ch);
2049                 if (!e)
2050                     return NULL;
2051                 asdl_seq_SET(args, nargs++, e);
2052             }
2053             else {
2054                 keyword_ty kw;
2055                 identifier key;
2056                 int k;
2057                 char *tmp;
2058 
2059                 /* CHILD(ch, 0) is test, but must be an identifier? */
2060                 e = ast_for_expr(c, CHILD(ch, 0));
2061                 if (!e)
2062                     return NULL;
2063                 /* f(lambda x: x[0] = 3) ends up getting parsed with
2064                  * LHS test = lambda x: x[0], and RHS test = 3.
2065                  * SF bug 132313 points out that complaining about a keyword
2066                  * then is very confusing.
2067                  */
2068                 if (e->kind == Lambda_kind) {
2069                     ast_error(CHILD(ch, 0),
2070                               "lambda cannot contain assignment");
2071                     return NULL;
2072                 } else if (e->kind != Name_kind) {
2073                     ast_error(CHILD(ch, 0), "keyword can't be an expression");
2074                     return NULL;
2075                 }
2076                 key = e->v.Name.id;
2077                 if (!forbidden_check(c, CHILD(ch, 0), PyBytes_AS_STRING(key)))
2078                     return NULL;
2079                 for (k = 0; k < nkeywords; k++) {
2080                     tmp = PyString_AS_STRING(
2081                         ((keyword_ty)asdl_seq_GET(keywords, k))->arg);
2082                     if (!strcmp(tmp, PyString_AS_STRING(key))) {
2083                         ast_error(CHILD(ch, 0), "keyword argument repeated");
2084                         return NULL;
2085                     }
2086                 }
2087                 e = ast_for_expr(c, CHILD(ch, 2));
2088                 if (!e)
2089                     return NULL;
2090                 kw = keyword(key, e, c->c_arena);
2091                 if (!kw)
2092                     return NULL;
2093                 asdl_seq_SET(keywords, nkeywords++, kw);
2094             }
2095         }
2096         else if (TYPE(ch) == STAR) {
2097             vararg = ast_for_expr(c, CHILD(n, i+1));
2098             if (!vararg)
2099                 return NULL;
2100             i++;
2101         }
2102         else if (TYPE(ch) == DOUBLESTAR) {
2103             kwarg = ast_for_expr(c, CHILD(n, i+1));
2104             if (!kwarg)
2105                 return NULL;
2106             i++;
2107         }
2108     }
2109 
2110     return Call(func, args, keywords, vararg, kwarg, func->lineno,
2111                 func->col_offset, c->c_arena);
2112 }
2113 
2114 static expr_ty
ast_for_testlist(struct compiling * c,const node * n)2115 ast_for_testlist(struct compiling *c, const node* n)
2116 {
2117     /* testlist_comp: test (',' test)* [','] */
2118     /* testlist: test (',' test)* [','] */
2119     /* testlist_safe: test (',' test)+ [','] */
2120     /* testlist1: test (',' test)* */
2121     assert(NCH(n) > 0);
2122     if (TYPE(n) == testlist_comp) {
2123         if (NCH(n) > 1)
2124             assert(TYPE(CHILD(n, 1)) != comp_for);
2125     }
2126     else {
2127         assert(TYPE(n) == testlist ||
2128                TYPE(n) == testlist_safe ||
2129                TYPE(n) == testlist1);
2130     }
2131     if (NCH(n) == 1)
2132         return ast_for_expr(c, CHILD(n, 0));
2133     else {
2134         asdl_seq *tmp = seq_for_testlist(c, n);
2135         if (!tmp)
2136             return NULL;
2137         return Tuple(tmp, Load, LINENO(n), n->n_col_offset, c->c_arena);
2138     }
2139 }
2140 
2141 static expr_ty
ast_for_testlist_comp(struct compiling * c,const node * n)2142 ast_for_testlist_comp(struct compiling *c, const node* n)
2143 {
2144     /* testlist_comp: test ( comp_for | (',' test)* [','] ) */
2145     /* argument: test [ comp_for ] */
2146     assert(TYPE(n) == testlist_comp || TYPE(n) == argument);
2147     if (NCH(n) > 1 && TYPE(CHILD(n, 1)) == comp_for)
2148         return ast_for_genexp(c, n);
2149     return ast_for_testlist(c, n);
2150 }
2151 
2152 /* like ast_for_testlist() but returns a sequence */
2153 static asdl_seq*
ast_for_class_bases(struct compiling * c,const node * n)2154 ast_for_class_bases(struct compiling *c, const node* n)
2155 {
2156     /* testlist: test (',' test)* [','] */
2157     assert(NCH(n) > 0);
2158     REQ(n, testlist);
2159     if (NCH(n) == 1) {
2160         expr_ty base;
2161         asdl_seq *bases = asdl_seq_new(1, c->c_arena);
2162         if (!bases)
2163             return NULL;
2164         base = ast_for_expr(c, CHILD(n, 0));
2165         if (!base)
2166             return NULL;
2167         asdl_seq_SET(bases, 0, base);
2168         return bases;
2169     }
2170 
2171     return seq_for_testlist(c, n);
2172 }
2173 
2174 static stmt_ty
ast_for_expr_stmt(struct compiling * c,const node * n)2175 ast_for_expr_stmt(struct compiling *c, const node *n)
2176 {
2177     REQ(n, expr_stmt);
2178     /* expr_stmt: testlist (augassign (yield_expr|testlist)
2179                 | ('=' (yield_expr|testlist))*)
2180        testlist: test (',' test)* [',']
2181        augassign: '+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^='
2182                 | '<<=' | '>>=' | '**=' | '//='
2183        test: ... here starts the operator precedence dance
2184      */
2185 
2186     if (NCH(n) == 1) {
2187         expr_ty e = ast_for_testlist(c, CHILD(n, 0));
2188         if (!e)
2189             return NULL;
2190 
2191         return Expr(e, LINENO(n), n->n_col_offset, c->c_arena);
2192     }
2193     else if (TYPE(CHILD(n, 1)) == augassign) {
2194         expr_ty expr1, expr2;
2195         operator_ty newoperator;
2196         node *ch = CHILD(n, 0);
2197 
2198         expr1 = ast_for_testlist(c, ch);
2199         if (!expr1)
2200             return NULL;
2201         if(!set_context(c, expr1, Store, ch))
2202             return NULL;
2203         /* set_context checks that most expressions are not the left side.
2204           Augmented assignments can only have a name, a subscript, or an
2205           attribute on the left, though, so we have to explicitly check for
2206           those. */
2207         switch (expr1->kind) {
2208             case Name_kind:
2209             case Attribute_kind:
2210             case Subscript_kind:
2211                 break;
2212             default:
2213                 ast_error(ch, "illegal expression for augmented assignment");
2214                 return NULL;
2215         }
2216 
2217         ch = CHILD(n, 2);
2218         if (TYPE(ch) == testlist)
2219             expr2 = ast_for_testlist(c, ch);
2220         else
2221             expr2 = ast_for_expr(c, ch);
2222         if (!expr2)
2223             return NULL;
2224 
2225         newoperator = ast_for_augassign(c, CHILD(n, 1));
2226         if (!newoperator)
2227             return NULL;
2228 
2229         return AugAssign(expr1, newoperator, expr2, LINENO(n), n->n_col_offset,
2230                          c->c_arena);
2231     }
2232     else {
2233         int i;
2234         asdl_seq *targets;
2235         node *value;
2236         expr_ty expression;
2237 
2238         /* a normal assignment */
2239         REQ(CHILD(n, 1), EQUAL);
2240         targets = asdl_seq_new(NCH(n) / 2, c->c_arena);
2241         if (!targets)
2242             return NULL;
2243         for (i = 0; i < NCH(n) - 2; i += 2) {
2244             expr_ty e;
2245             node *ch = CHILD(n, i);
2246             if (TYPE(ch) == yield_expr) {
2247                 ast_error(ch, "assignment to yield expression not possible");
2248                 return NULL;
2249             }
2250             e = ast_for_testlist(c, ch);
2251             if (!e)
2252                 return NULL;
2253 
2254             /* set context to assign */
2255             if (!set_context(c, e, Store, CHILD(n, i)))
2256                 return NULL;
2257 
2258             asdl_seq_SET(targets, i / 2, e);
2259         }
2260         value = CHILD(n, NCH(n) - 1);
2261         if (TYPE(value) == testlist)
2262             expression = ast_for_testlist(c, value);
2263         else
2264             expression = ast_for_expr(c, value);
2265         if (!expression)
2266             return NULL;
2267         return Assign(targets, expression, LINENO(n), n->n_col_offset,
2268                       c->c_arena);
2269     }
2270 }
2271 
2272 static stmt_ty
ast_for_print_stmt(struct compiling * c,const node * n)2273 ast_for_print_stmt(struct compiling *c, const node *n)
2274 {
2275     /* print_stmt: 'print' ( [ test (',' test)* [','] ]
2276                              | '>>' test [ (',' test)+ [','] ] )
2277      */
2278     expr_ty dest = NULL, expression;
2279     asdl_seq *seq = NULL;
2280     bool nl;
2281     int i, j, values_count, start = 1;
2282 
2283     REQ(n, print_stmt);
2284     if (NCH(n) >= 2 && TYPE(CHILD(n, 1)) == RIGHTSHIFT) {
2285         dest = ast_for_expr(c, CHILD(n, 2));
2286         if (!dest)
2287             return NULL;
2288         start = 4;
2289     }
2290     values_count = (NCH(n) + 1 - start) / 2;
2291     if (values_count) {
2292         seq = asdl_seq_new(values_count, c->c_arena);
2293         if (!seq)
2294             return NULL;
2295         for (i = start, j = 0; i < NCH(n); i += 2, ++j) {
2296             expression = ast_for_expr(c, CHILD(n, i));
2297             if (!expression)
2298                 return NULL;
2299             asdl_seq_SET(seq, j, expression);
2300         }
2301     }
2302     nl = (TYPE(CHILD(n, NCH(n) - 1)) == COMMA) ? false : true;
2303     return Print(dest, seq, nl, LINENO(n), n->n_col_offset, c->c_arena);
2304 }
2305 
2306 static asdl_seq *
ast_for_exprlist(struct compiling * c,const node * n,expr_context_ty context)2307 ast_for_exprlist(struct compiling *c, const node *n, expr_context_ty context)
2308 {
2309     asdl_seq *seq;
2310     int i;
2311     expr_ty e;
2312 
2313     REQ(n, exprlist);
2314 
2315     seq = asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
2316     if (!seq)
2317         return NULL;
2318     for (i = 0; i < NCH(n); i += 2) {
2319         e = ast_for_expr(c, CHILD(n, i));
2320         if (!e)
2321             return NULL;
2322         asdl_seq_SET(seq, i / 2, e);
2323         if (context && !set_context(c, e, context, CHILD(n, i)))
2324             return NULL;
2325     }
2326     return seq;
2327 }
2328 
2329 static stmt_ty
ast_for_del_stmt(struct compiling * c,const node * n)2330 ast_for_del_stmt(struct compiling *c, const node *n)
2331 {
2332     asdl_seq *expr_list;
2333 
2334     /* del_stmt: 'del' exprlist */
2335     REQ(n, del_stmt);
2336 
2337     expr_list = ast_for_exprlist(c, CHILD(n, 1), Del);
2338     if (!expr_list)
2339         return NULL;
2340     return Delete(expr_list, LINENO(n), n->n_col_offset, c->c_arena);
2341 }
2342 
2343 static stmt_ty
ast_for_flow_stmt(struct compiling * c,const node * n)2344 ast_for_flow_stmt(struct compiling *c, const node *n)
2345 {
2346     /*
2347       flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt
2348                  | yield_stmt
2349       break_stmt: 'break'
2350       continue_stmt: 'continue'
2351       return_stmt: 'return' [testlist]
2352       yield_stmt: yield_expr
2353       yield_expr: 'yield' testlist
2354       raise_stmt: 'raise' [test [',' test [',' test]]]
2355     */
2356     node *ch;
2357 
2358     REQ(n, flow_stmt);
2359     ch = CHILD(n, 0);
2360     switch (TYPE(ch)) {
2361         case break_stmt:
2362             return Break(LINENO(n), n->n_col_offset, c->c_arena);
2363         case continue_stmt:
2364             return Continue(LINENO(n), n->n_col_offset, c->c_arena);
2365         case yield_stmt: { /* will reduce to yield_expr */
2366             expr_ty exp = ast_for_expr(c, CHILD(ch, 0));
2367             if (!exp)
2368                 return NULL;
2369             return Expr(exp, LINENO(n), n->n_col_offset, c->c_arena);
2370         }
2371         case return_stmt:
2372             if (NCH(ch) == 1)
2373                 return Return(NULL, LINENO(n), n->n_col_offset, c->c_arena);
2374             else {
2375                 expr_ty expression = ast_for_testlist(c, CHILD(ch, 1));
2376                 if (!expression)
2377                     return NULL;
2378                 return Return(expression, LINENO(n), n->n_col_offset,
2379                               c->c_arena);
2380             }
2381         case raise_stmt:
2382             if (NCH(ch) == 1)
2383                 return Raise(NULL, NULL, NULL, LINENO(n), n->n_col_offset,
2384                              c->c_arena);
2385             else if (NCH(ch) == 2) {
2386                 expr_ty expression = ast_for_expr(c, CHILD(ch, 1));
2387                 if (!expression)
2388                     return NULL;
2389                 return Raise(expression, NULL, NULL, LINENO(n),
2390                              n->n_col_offset, c->c_arena);
2391             }
2392             else if (NCH(ch) == 4) {
2393                 expr_ty expr1, expr2;
2394 
2395                 expr1 = ast_for_expr(c, CHILD(ch, 1));
2396                 if (!expr1)
2397                     return NULL;
2398                 expr2 = ast_for_expr(c, CHILD(ch, 3));
2399                 if (!expr2)
2400                     return NULL;
2401 
2402                 return Raise(expr1, expr2, NULL, LINENO(n), n->n_col_offset,
2403                              c->c_arena);
2404             }
2405             else if (NCH(ch) == 6) {
2406                 expr_ty expr1, expr2, expr3;
2407 
2408                 expr1 = ast_for_expr(c, CHILD(ch, 1));
2409                 if (!expr1)
2410                     return NULL;
2411                 expr2 = ast_for_expr(c, CHILD(ch, 3));
2412                 if (!expr2)
2413                     return NULL;
2414                 expr3 = ast_for_expr(c, CHILD(ch, 5));
2415                 if (!expr3)
2416                     return NULL;
2417 
2418                 return Raise(expr1, expr2, expr3, LINENO(n), n->n_col_offset,
2419                              c->c_arena);
2420             }
2421         default:
2422             PyErr_Format(PyExc_SystemError,
2423                          "unexpected flow_stmt: %d", TYPE(ch));
2424             return NULL;
2425     }
2426 
2427     PyErr_SetString(PyExc_SystemError, "unhandled flow statement");
2428     return NULL;
2429 }
2430 
2431 static alias_ty
alias_for_import_name(struct compiling * c,const node * n,int store)2432 alias_for_import_name(struct compiling *c, const node *n, int store)
2433 {
2434     /*
2435       import_as_name: NAME ['as' NAME]
2436       dotted_as_name: dotted_name ['as' NAME]
2437       dotted_name: NAME ('.' NAME)*
2438     */
2439     PyObject *str, *name;
2440 
2441  loop:
2442     switch (TYPE(n)) {
2443          case import_as_name: {
2444             node *name_node = CHILD(n, 0);
2445             str = NULL;
2446             if (NCH(n) == 3) {
2447                 node *str_node = CHILD(n, 2);
2448                 if (store && !forbidden_check(c, str_node, STR(str_node)))
2449                     return NULL;
2450                 str = NEW_IDENTIFIER(str_node);
2451                 if (!str)
2452                     return NULL;
2453             }
2454             else {
2455                 if (!forbidden_check(c, name_node, STR(name_node)))
2456                     return NULL;
2457             }
2458             name = NEW_IDENTIFIER(name_node);
2459             if (!name)
2460                 return NULL;
2461             return alias(name, str, c->c_arena);
2462         }
2463         case dotted_as_name:
2464             if (NCH(n) == 1) {
2465                 n = CHILD(n, 0);
2466                 goto loop;
2467             }
2468             else {
2469                 node *asname_node = CHILD(n, 2);
2470                 alias_ty a = alias_for_import_name(c, CHILD(n, 0), 0);
2471                 if (!a)
2472                     return NULL;
2473                 assert(!a->asname);
2474                 if (!forbidden_check(c, asname_node, STR(asname_node)))
2475                     return NULL;
2476                 a->asname = NEW_IDENTIFIER(asname_node);
2477                 if (!a->asname)
2478                     return NULL;
2479                 return a;
2480             }
2481             break;
2482         case dotted_name:
2483             if (NCH(n) == 1) {
2484                 node *name_node = CHILD(n, 0);
2485                 if (store && !forbidden_check(c, name_node, STR(name_node)))
2486                     return NULL;
2487                 name = NEW_IDENTIFIER(name_node);
2488                 if (!name)
2489                     return NULL;
2490                 return alias(name, NULL, c->c_arena);
2491             }
2492             else {
2493                 /* Create a string of the form "a.b.c" */
2494                 int i;
2495                 size_t len;
2496                 char *s;
2497 
2498                 len = 0;
2499                 for (i = 0; i < NCH(n); i += 2)
2500                     /* length of string plus one for the dot */
2501                     len += strlen(STR(CHILD(n, i))) + 1;
2502                 len--; /* the last name doesn't have a dot */
2503                 str = PyString_FromStringAndSize(NULL, len);
2504                 if (!str)
2505                     return NULL;
2506                 s = PyString_AS_STRING(str);
2507                 if (!s)
2508                     return NULL;
2509                 for (i = 0; i < NCH(n); i += 2) {
2510                     char *sch = STR(CHILD(n, i));
2511                     strcpy(s, STR(CHILD(n, i)));
2512                     s += strlen(sch);
2513                     *s++ = '.';
2514                 }
2515                 --s;
2516                 *s = '\0';
2517                 PyString_InternInPlace(&str);
2518                 PyArena_AddPyObject(c->c_arena, str);
2519                 return alias(str, NULL, c->c_arena);
2520             }
2521             break;
2522         case STAR:
2523             str = PyString_InternFromString("*");
2524             if (!str)
2525                 return NULL;
2526             PyArena_AddPyObject(c->c_arena, str);
2527             return alias(str, NULL, c->c_arena);
2528         default:
2529             PyErr_Format(PyExc_SystemError,
2530                          "unexpected import name: %d", TYPE(n));
2531             return NULL;
2532     }
2533 
2534     PyErr_SetString(PyExc_SystemError, "unhandled import name condition");
2535     return NULL;
2536 }
2537 
2538 static stmt_ty
ast_for_import_stmt(struct compiling * c,const node * n)2539 ast_for_import_stmt(struct compiling *c, const node *n)
2540 {
2541     /*
2542       import_stmt: import_name | import_from
2543       import_name: 'import' dotted_as_names
2544       import_from: 'from' ('.'* dotted_name | '.') 'import'
2545                           ('*' | '(' import_as_names ')' | import_as_names)
2546     */
2547     int lineno;
2548     int col_offset;
2549     int i;
2550     asdl_seq *aliases;
2551 
2552     REQ(n, import_stmt);
2553     lineno = LINENO(n);
2554     col_offset = n->n_col_offset;
2555     n = CHILD(n, 0);
2556     if (TYPE(n) == import_name) {
2557         n = CHILD(n, 1);
2558         REQ(n, dotted_as_names);
2559         aliases = asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
2560         if (!aliases)
2561             return NULL;
2562         for (i = 0; i < NCH(n); i += 2) {
2563             alias_ty import_alias = alias_for_import_name(c, CHILD(n, i), 1);
2564             if (!import_alias)
2565                 return NULL;
2566             asdl_seq_SET(aliases, i / 2, import_alias);
2567         }
2568         return Import(aliases, lineno, col_offset, c->c_arena);
2569     }
2570     else if (TYPE(n) == import_from) {
2571         int n_children;
2572         int idx, ndots = 0;
2573         alias_ty mod = NULL;
2574         identifier modname = NULL;
2575 
2576        /* Count the number of dots (for relative imports) and check for the
2577           optional module name */
2578         for (idx = 1; idx < NCH(n); idx++) {
2579             if (TYPE(CHILD(n, idx)) == dotted_name) {
2580                 mod = alias_for_import_name(c, CHILD(n, idx), 0);
2581                 if (!mod)
2582                     return NULL;
2583                 idx++;
2584                 break;
2585             } else if (TYPE(CHILD(n, idx)) != DOT) {
2586                 break;
2587             }
2588             ndots++;
2589         }
2590         idx++; /* skip over the 'import' keyword */
2591         switch (TYPE(CHILD(n, idx))) {
2592         case STAR:
2593             /* from ... import * */
2594             n = CHILD(n, idx);
2595             n_children = 1;
2596             break;
2597         case LPAR:
2598             /* from ... import (x, y, z) */
2599             n = CHILD(n, idx + 1);
2600             n_children = NCH(n);
2601             break;
2602         case import_as_names:
2603             /* from ... import x, y, z */
2604             n = CHILD(n, idx);
2605             n_children = NCH(n);
2606             if (n_children % 2 == 0) {
2607                 ast_error(n, "trailing comma not allowed without"
2608                              " surrounding parentheses");
2609                 return NULL;
2610             }
2611             break;
2612         default:
2613             ast_error(n, "Unexpected node-type in from-import");
2614             return NULL;
2615         }
2616 
2617         aliases = asdl_seq_new((n_children + 1) / 2, c->c_arena);
2618         if (!aliases)
2619             return NULL;
2620 
2621         /* handle "from ... import *" special b/c there's no children */
2622         if (TYPE(n) == STAR) {
2623             alias_ty import_alias = alias_for_import_name(c, n, 1);
2624             if (!import_alias)
2625                 return NULL;
2626                 asdl_seq_SET(aliases, 0, import_alias);
2627         }
2628         else {
2629             for (i = 0; i < NCH(n); i += 2) {
2630                 alias_ty import_alias = alias_for_import_name(c, CHILD(n, i), 1);
2631                 if (!import_alias)
2632                     return NULL;
2633                     asdl_seq_SET(aliases, i / 2, import_alias);
2634             }
2635         }
2636         if (mod != NULL)
2637             modname = mod->name;
2638         return ImportFrom(modname, aliases, ndots, lineno, col_offset,
2639                           c->c_arena);
2640     }
2641     PyErr_Format(PyExc_SystemError,
2642                  "unknown import statement: starts with command '%s'",
2643                  STR(CHILD(n, 0)));
2644     return NULL;
2645 }
2646 
2647 static stmt_ty
ast_for_global_stmt(struct compiling * c,const node * n)2648 ast_for_global_stmt(struct compiling *c, const node *n)
2649 {
2650     /* global_stmt: 'global' NAME (',' NAME)* */
2651     identifier name;
2652     asdl_seq *s;
2653     int i;
2654 
2655     REQ(n, global_stmt);
2656     s = asdl_seq_new(NCH(n) / 2, c->c_arena);
2657     if (!s)
2658         return NULL;
2659     for (i = 1; i < NCH(n); i += 2) {
2660         name = NEW_IDENTIFIER(CHILD(n, i));
2661         if (!name)
2662             return NULL;
2663         asdl_seq_SET(s, i / 2, name);
2664     }
2665     return Global(s, LINENO(n), n->n_col_offset, c->c_arena);
2666 }
2667 
2668 static stmt_ty
ast_for_exec_stmt(struct compiling * c,const node * n)2669 ast_for_exec_stmt(struct compiling *c, const node *n)
2670 {
2671     expr_ty expr1, globals = NULL, locals = NULL;
2672     int n_children = NCH(n);
2673     if (n_children != 2 && n_children != 4 && n_children != 6) {
2674         PyErr_Format(PyExc_SystemError,
2675                      "poorly formed 'exec' statement: %d parts to statement",
2676                      n_children);
2677         return NULL;
2678     }
2679 
2680     /* exec_stmt: 'exec' expr ['in' test [',' test]] */
2681     REQ(n, exec_stmt);
2682     expr1 = ast_for_expr(c, CHILD(n, 1));
2683     if (!expr1)
2684         return NULL;
2685 
2686     if (expr1->kind == Tuple_kind && n_children < 4 &&
2687         (asdl_seq_LEN(expr1->v.Tuple.elts) == 2 ||
2688          asdl_seq_LEN(expr1->v.Tuple.elts) == 3)) {
2689         /* Backwards compatibility: passing exec args as a tuple */
2690         globals = asdl_seq_GET(expr1->v.Tuple.elts, 1);
2691         if (asdl_seq_LEN(expr1->v.Tuple.elts) == 3) {
2692             locals = asdl_seq_GET(expr1->v.Tuple.elts, 2);
2693         }
2694         expr1 = asdl_seq_GET(expr1->v.Tuple.elts, 0);
2695     }
2696 
2697     if (n_children >= 4) {
2698         globals = ast_for_expr(c, CHILD(n, 3));
2699         if (!globals)
2700             return NULL;
2701     }
2702     if (n_children == 6) {
2703         locals = ast_for_expr(c, CHILD(n, 5));
2704         if (!locals)
2705             return NULL;
2706     }
2707 
2708     return Exec(expr1, globals, locals, LINENO(n), n->n_col_offset,
2709                 c->c_arena);
2710 }
2711 
2712 static stmt_ty
ast_for_assert_stmt(struct compiling * c,const node * n)2713 ast_for_assert_stmt(struct compiling *c, const node *n)
2714 {
2715     /* assert_stmt: 'assert' test [',' test] */
2716     REQ(n, assert_stmt);
2717     if (NCH(n) == 2) {
2718         expr_ty expression = ast_for_expr(c, CHILD(n, 1));
2719         if (!expression)
2720             return NULL;
2721         return Assert(expression, NULL, LINENO(n), n->n_col_offset,
2722                       c->c_arena);
2723     }
2724     else if (NCH(n) == 4) {
2725         expr_ty expr1, expr2;
2726 
2727         expr1 = ast_for_expr(c, CHILD(n, 1));
2728         if (!expr1)
2729             return NULL;
2730         expr2 = ast_for_expr(c, CHILD(n, 3));
2731         if (!expr2)
2732             return NULL;
2733 
2734         return Assert(expr1, expr2, LINENO(n), n->n_col_offset, c->c_arena);
2735     }
2736     PyErr_Format(PyExc_SystemError,
2737                  "improper number of parts to 'assert' statement: %d",
2738                  NCH(n));
2739     return NULL;
2740 }
2741 
2742 static asdl_seq *
ast_for_suite(struct compiling * c,const node * n)2743 ast_for_suite(struct compiling *c, const node *n)
2744 {
2745     /* suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT */
2746     asdl_seq *seq;
2747     stmt_ty s;
2748     int i, total, num, end, pos = 0;
2749     node *ch;
2750 
2751     REQ(n, suite);
2752 
2753     total = num_stmts(n);
2754     seq = asdl_seq_new(total, c->c_arena);
2755     if (!seq)
2756         return NULL;
2757     if (TYPE(CHILD(n, 0)) == simple_stmt) {
2758         n = CHILD(n, 0);
2759         /* simple_stmt always ends with a NEWLINE,
2760            and may have a trailing SEMI
2761         */
2762         end = NCH(n) - 1;
2763         if (TYPE(CHILD(n, end - 1)) == SEMI)
2764             end--;
2765         /* loop by 2 to skip semi-colons */
2766         for (i = 0; i < end; i += 2) {
2767             ch = CHILD(n, i);
2768             s = ast_for_stmt(c, ch);
2769             if (!s)
2770                 return NULL;
2771             asdl_seq_SET(seq, pos++, s);
2772         }
2773     }
2774     else {
2775         for (i = 2; i < (NCH(n) - 1); i++) {
2776             ch = CHILD(n, i);
2777             REQ(ch, stmt);
2778             num = num_stmts(ch);
2779             if (num == 1) {
2780                 /* small_stmt or compound_stmt with only one child */
2781                 s = ast_for_stmt(c, ch);
2782                 if (!s)
2783                     return NULL;
2784                 asdl_seq_SET(seq, pos++, s);
2785             }
2786             else {
2787                 int j;
2788                 ch = CHILD(ch, 0);
2789                 REQ(ch, simple_stmt);
2790                 for (j = 0; j < NCH(ch); j += 2) {
2791                     /* statement terminates with a semi-colon ';' */
2792                     if (NCH(CHILD(ch, j)) == 0) {
2793                         assert((j + 1) == NCH(ch));
2794                         break;
2795                     }
2796                     s = ast_for_stmt(c, CHILD(ch, j));
2797                     if (!s)
2798                         return NULL;
2799                     asdl_seq_SET(seq, pos++, s);
2800                 }
2801             }
2802         }
2803     }
2804     assert(pos == seq->size);
2805     return seq;
2806 }
2807 
2808 static stmt_ty
ast_for_if_stmt(struct compiling * c,const node * n)2809 ast_for_if_stmt(struct compiling *c, const node *n)
2810 {
2811     /* if_stmt: 'if' test ':' suite ('elif' test ':' suite)*
2812        ['else' ':' suite]
2813     */
2814     char *s;
2815 
2816     REQ(n, if_stmt);
2817 
2818     if (NCH(n) == 4) {
2819         expr_ty expression;
2820         asdl_seq *suite_seq;
2821 
2822         expression = ast_for_expr(c, CHILD(n, 1));
2823         if (!expression)
2824             return NULL;
2825         suite_seq = ast_for_suite(c, CHILD(n, 3));
2826         if (!suite_seq)
2827             return NULL;
2828 
2829         return If(expression, suite_seq, NULL, LINENO(n), n->n_col_offset,
2830                   c->c_arena);
2831     }
2832 
2833     s = STR(CHILD(n, 4));
2834     /* s[2], the third character in the string, will be
2835        's' for el_s_e, or
2836        'i' for el_i_f
2837     */
2838     if (s[2] == 's') {
2839         expr_ty expression;
2840         asdl_seq *seq1, *seq2;
2841 
2842         expression = ast_for_expr(c, CHILD(n, 1));
2843         if (!expression)
2844             return NULL;
2845         seq1 = ast_for_suite(c, CHILD(n, 3));
2846         if (!seq1)
2847             return NULL;
2848         seq2 = ast_for_suite(c, CHILD(n, 6));
2849         if (!seq2)
2850             return NULL;
2851 
2852         return If(expression, seq1, seq2, LINENO(n), n->n_col_offset,
2853                   c->c_arena);
2854     }
2855     else if (s[2] == 'i') {
2856         int i, n_elif, has_else = 0;
2857         expr_ty expression;
2858         asdl_seq *suite_seq;
2859         asdl_seq *orelse = NULL;
2860         n_elif = NCH(n) - 4;
2861         /* must reference the child n_elif+1 since 'else' token is third,
2862            not fourth, child from the end. */
2863         if (TYPE(CHILD(n, (n_elif + 1))) == NAME
2864             && STR(CHILD(n, (n_elif + 1)))[2] == 's') {
2865             has_else = 1;
2866             n_elif -= 3;
2867         }
2868         n_elif /= 4;
2869 
2870         if (has_else) {
2871             asdl_seq *suite_seq2;
2872 
2873             orelse = asdl_seq_new(1, c->c_arena);
2874             if (!orelse)
2875                 return NULL;
2876             expression = ast_for_expr(c, CHILD(n, NCH(n) - 6));
2877             if (!expression)
2878                 return NULL;
2879             suite_seq = ast_for_suite(c, CHILD(n, NCH(n) - 4));
2880             if (!suite_seq)
2881                 return NULL;
2882             suite_seq2 = ast_for_suite(c, CHILD(n, NCH(n) - 1));
2883             if (!suite_seq2)
2884                 return NULL;
2885 
2886             asdl_seq_SET(orelse, 0,
2887                          If(expression, suite_seq, suite_seq2,
2888                             LINENO(CHILD(n, NCH(n) - 6)),
2889                             CHILD(n, NCH(n) - 6)->n_col_offset,
2890                             c->c_arena));
2891             /* the just-created orelse handled the last elif */
2892             n_elif--;
2893         }
2894 
2895         for (i = 0; i < n_elif; i++) {
2896             int off = 5 + (n_elif - i - 1) * 4;
2897             asdl_seq *newobj = asdl_seq_new(1, c->c_arena);
2898             if (!newobj)
2899                 return NULL;
2900             expression = ast_for_expr(c, CHILD(n, off));
2901             if (!expression)
2902                 return NULL;
2903             suite_seq = ast_for_suite(c, CHILD(n, off + 2));
2904             if (!suite_seq)
2905                 return NULL;
2906 
2907             asdl_seq_SET(newobj, 0,
2908                          If(expression, suite_seq, orelse,
2909                             LINENO(CHILD(n, off)),
2910                             CHILD(n, off)->n_col_offset, c->c_arena));
2911             orelse = newobj;
2912         }
2913         expression = ast_for_expr(c, CHILD(n, 1));
2914         if (!expression)
2915             return NULL;
2916         suite_seq = ast_for_suite(c, CHILD(n, 3));
2917         if (!suite_seq)
2918             return NULL;
2919         return If(expression, suite_seq, orelse,
2920                   LINENO(n), n->n_col_offset, c->c_arena);
2921     }
2922 
2923     PyErr_Format(PyExc_SystemError,
2924                  "unexpected token in 'if' statement: %s", s);
2925     return NULL;
2926 }
2927 
2928 static stmt_ty
ast_for_while_stmt(struct compiling * c,const node * n)2929 ast_for_while_stmt(struct compiling *c, const node *n)
2930 {
2931     /* while_stmt: 'while' test ':' suite ['else' ':' suite] */
2932     REQ(n, while_stmt);
2933 
2934     if (NCH(n) == 4) {
2935         expr_ty expression;
2936         asdl_seq *suite_seq;
2937 
2938         expression = ast_for_expr(c, CHILD(n, 1));
2939         if (!expression)
2940             return NULL;
2941         suite_seq = ast_for_suite(c, CHILD(n, 3));
2942         if (!suite_seq)
2943             return NULL;
2944         return While(expression, suite_seq, NULL, LINENO(n), n->n_col_offset,
2945                      c->c_arena);
2946     }
2947     else if (NCH(n) == 7) {
2948         expr_ty expression;
2949         asdl_seq *seq1, *seq2;
2950 
2951         expression = ast_for_expr(c, CHILD(n, 1));
2952         if (!expression)
2953             return NULL;
2954         seq1 = ast_for_suite(c, CHILD(n, 3));
2955         if (!seq1)
2956             return NULL;
2957         seq2 = ast_for_suite(c, CHILD(n, 6));
2958         if (!seq2)
2959             return NULL;
2960 
2961         return While(expression, seq1, seq2, LINENO(n), n->n_col_offset,
2962                      c->c_arena);
2963     }
2964 
2965     PyErr_Format(PyExc_SystemError,
2966                  "wrong number of tokens for 'while' statement: %d",
2967                  NCH(n));
2968     return NULL;
2969 }
2970 
2971 static stmt_ty
ast_for_for_stmt(struct compiling * c,const node * n)2972 ast_for_for_stmt(struct compiling *c, const node *n)
2973 {
2974     asdl_seq *_target, *seq = NULL, *suite_seq;
2975     expr_ty expression;
2976     expr_ty target, first;
2977     const node *node_target;
2978     /* for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] */
2979     REQ(n, for_stmt);
2980 
2981     if (NCH(n) == 9) {
2982         seq = ast_for_suite(c, CHILD(n, 8));
2983         if (!seq)
2984             return NULL;
2985     }
2986 
2987     node_target = CHILD(n, 1);
2988     _target = ast_for_exprlist(c, node_target, Store);
2989     if (!_target)
2990         return NULL;
2991     /* Check the # of children rather than the length of _target, since
2992        for x, in ... has 1 element in _target, but still requires a Tuple. */
2993     first = (expr_ty)asdl_seq_GET(_target, 0);
2994     if (NCH(node_target) == 1)
2995         target = first;
2996     else
2997         target = Tuple(_target, Store, first->lineno, first->col_offset, c->c_arena);
2998 
2999     expression = ast_for_testlist(c, CHILD(n, 3));
3000     if (!expression)
3001         return NULL;
3002     suite_seq = ast_for_suite(c, CHILD(n, 5));
3003     if (!suite_seq)
3004         return NULL;
3005 
3006     return For(target, expression, suite_seq, seq, LINENO(n), n->n_col_offset,
3007                c->c_arena);
3008 }
3009 
3010 static excepthandler_ty
ast_for_except_clause(struct compiling * c,const node * exc,node * body)3011 ast_for_except_clause(struct compiling *c, const node *exc, node *body)
3012 {
3013     /* except_clause: 'except' [test [(',' | 'as') test]] */
3014     REQ(exc, except_clause);
3015     REQ(body, suite);
3016 
3017     if (NCH(exc) == 1) {
3018         asdl_seq *suite_seq = ast_for_suite(c, body);
3019         if (!suite_seq)
3020             return NULL;
3021 
3022         return ExceptHandler(NULL, NULL, suite_seq, LINENO(exc),
3023                              exc->n_col_offset, c->c_arena);
3024     }
3025     else if (NCH(exc) == 2) {
3026         expr_ty expression;
3027         asdl_seq *suite_seq;
3028 
3029         expression = ast_for_expr(c, CHILD(exc, 1));
3030         if (!expression)
3031             return NULL;
3032         suite_seq = ast_for_suite(c, body);
3033         if (!suite_seq)
3034             return NULL;
3035 
3036         return ExceptHandler(expression, NULL, suite_seq, LINENO(exc),
3037                              exc->n_col_offset, c->c_arena);
3038     }
3039     else if (NCH(exc) == 4) {
3040         asdl_seq *suite_seq;
3041         expr_ty expression;
3042         expr_ty e = ast_for_expr(c, CHILD(exc, 3));
3043         if (!e)
3044             return NULL;
3045         if (!set_context(c, e, Store, CHILD(exc, 3)))
3046             return NULL;
3047         expression = ast_for_expr(c, CHILD(exc, 1));
3048         if (!expression)
3049             return NULL;
3050         suite_seq = ast_for_suite(c, body);
3051         if (!suite_seq)
3052             return NULL;
3053 
3054         return ExceptHandler(expression, e, suite_seq, LINENO(exc),
3055                              exc->n_col_offset, c->c_arena);
3056     }
3057 
3058     PyErr_Format(PyExc_SystemError,
3059                  "wrong number of children for 'except' clause: %d",
3060                  NCH(exc));
3061     return NULL;
3062 }
3063 
3064 static stmt_ty
ast_for_try_stmt(struct compiling * c,const node * n)3065 ast_for_try_stmt(struct compiling *c, const node *n)
3066 {
3067     const int nch = NCH(n);
3068     int n_except = (nch - 3)/3;
3069     asdl_seq *body, *orelse = NULL, *finally = NULL;
3070 
3071     REQ(n, try_stmt);
3072 
3073     body = ast_for_suite(c, CHILD(n, 2));
3074     if (body == NULL)
3075         return NULL;
3076 
3077     if (TYPE(CHILD(n, nch - 3)) == NAME) {
3078         if (strcmp(STR(CHILD(n, nch - 3)), "finally") == 0) {
3079             if (nch >= 9 && TYPE(CHILD(n, nch - 6)) == NAME) {
3080                 /* we can assume it's an "else",
3081                    because nch >= 9 for try-else-finally and
3082                    it would otherwise have a type of except_clause */
3083                 orelse = ast_for_suite(c, CHILD(n, nch - 4));
3084                 if (orelse == NULL)
3085                     return NULL;
3086                 n_except--;
3087             }
3088 
3089             finally = ast_for_suite(c, CHILD(n, nch - 1));
3090             if (finally == NULL)
3091                 return NULL;
3092             n_except--;
3093         }
3094         else {
3095             /* we can assume it's an "else",
3096                otherwise it would have a type of except_clause */
3097             orelse = ast_for_suite(c, CHILD(n, nch - 1));
3098             if (orelse == NULL)
3099                 return NULL;
3100             n_except--;
3101         }
3102     }
3103     else if (TYPE(CHILD(n, nch - 3)) != except_clause) {
3104         ast_error(n, "malformed 'try' statement");
3105         return NULL;
3106     }
3107 
3108     if (n_except > 0) {
3109         int i;
3110         stmt_ty except_st;
3111         /* process except statements to create a try ... except */
3112         asdl_seq *handlers = asdl_seq_new(n_except, c->c_arena);
3113         if (handlers == NULL)
3114             return NULL;
3115 
3116         for (i = 0; i < n_except; i++) {
3117             excepthandler_ty e = ast_for_except_clause(c, CHILD(n, 3 + i * 3),
3118                                                        CHILD(n, 5 + i * 3));
3119             if (!e)
3120                 return NULL;
3121             asdl_seq_SET(handlers, i, e);
3122         }
3123 
3124         except_st = TryExcept(body, handlers, orelse, LINENO(n),
3125                               n->n_col_offset, c->c_arena);
3126         if (!finally)
3127             return except_st;
3128 
3129         /* if a 'finally' is present too, we nest the TryExcept within a
3130            TryFinally to emulate try ... except ... finally */
3131         body = asdl_seq_new(1, c->c_arena);
3132         if (body == NULL)
3133             return NULL;
3134         asdl_seq_SET(body, 0, except_st);
3135     }
3136 
3137     /* must be a try ... finally (except clauses are in body, if any exist) */
3138     assert(finally != NULL);
3139     return TryFinally(body, finally, LINENO(n), n->n_col_offset, c->c_arena);
3140 }
3141 
3142 /* with_item: test ['as' expr] */
3143 static stmt_ty
ast_for_with_item(struct compiling * c,const node * n,asdl_seq * content)3144 ast_for_with_item(struct compiling *c, const node *n, asdl_seq *content)
3145 {
3146     expr_ty context_expr, optional_vars = NULL;
3147 
3148     REQ(n, with_item);
3149     context_expr = ast_for_expr(c, CHILD(n, 0));
3150     if (!context_expr)
3151         return NULL;
3152     if (NCH(n) == 3) {
3153         optional_vars = ast_for_expr(c, CHILD(n, 2));
3154 
3155         if (!optional_vars) {
3156             return NULL;
3157         }
3158         if (!set_context(c, optional_vars, Store, n)) {
3159             return NULL;
3160         }
3161     }
3162 
3163     return With(context_expr, optional_vars, content, LINENO(n),
3164                 n->n_col_offset, c->c_arena);
3165 }
3166 
3167 /* with_stmt: 'with' with_item (',' with_item)* ':' suite */
3168 static stmt_ty
ast_for_with_stmt(struct compiling * c,const node * n)3169 ast_for_with_stmt(struct compiling *c, const node *n)
3170 {
3171     int i;
3172     stmt_ty ret;
3173     asdl_seq *inner;
3174 
3175     REQ(n, with_stmt);
3176 
3177     /* process the with items inside-out */
3178     i = NCH(n) - 1;
3179     /* the suite of the innermost with item is the suite of the with stmt */
3180     inner = ast_for_suite(c, CHILD(n, i));
3181     if (!inner)
3182         return NULL;
3183 
3184     for (;;) {
3185         i -= 2;
3186         ret = ast_for_with_item(c, CHILD(n, i), inner);
3187         if (!ret)
3188             return NULL;
3189         /* was this the last item? */
3190         if (i == 1)
3191             break;
3192         /* if not, wrap the result so far in a new sequence */
3193         inner = asdl_seq_new(1, c->c_arena);
3194         if (!inner)
3195             return NULL;
3196         asdl_seq_SET(inner, 0, ret);
3197     }
3198 
3199     return ret;
3200 }
3201 
3202 static stmt_ty
ast_for_classdef(struct compiling * c,const node * n,asdl_seq * decorator_seq)3203 ast_for_classdef(struct compiling *c, const node *n, asdl_seq *decorator_seq)
3204 {
3205     /* classdef: 'class' NAME ['(' testlist ')'] ':' suite */
3206     PyObject *classname;
3207     asdl_seq *bases, *s;
3208 
3209     REQ(n, classdef);
3210 
3211     if (!forbidden_check(c, n, STR(CHILD(n, 1))))
3212             return NULL;
3213 
3214     if (NCH(n) == 4) {
3215         s = ast_for_suite(c, CHILD(n, 3));
3216         if (!s)
3217             return NULL;
3218         classname = NEW_IDENTIFIER(CHILD(n, 1));
3219         if (!classname)
3220             return NULL;
3221         return ClassDef(classname, NULL, s, decorator_seq, LINENO(n),
3222                         n->n_col_offset, c->c_arena);
3223     }
3224     /* check for empty base list */
3225     if (TYPE(CHILD(n,3)) == RPAR) {
3226         s = ast_for_suite(c, CHILD(n,5));
3227         if (!s)
3228             return NULL;
3229         classname = NEW_IDENTIFIER(CHILD(n, 1));
3230         if (!classname)
3231             return NULL;
3232         return ClassDef(classname, NULL, s, decorator_seq, LINENO(n),
3233                         n->n_col_offset, c->c_arena);
3234     }
3235 
3236     /* else handle the base class list */
3237     bases = ast_for_class_bases(c, CHILD(n, 3));
3238     if (!bases)
3239         return NULL;
3240 
3241     s = ast_for_suite(c, CHILD(n, 6));
3242     if (!s)
3243         return NULL;
3244     classname = NEW_IDENTIFIER(CHILD(n, 1));
3245     if (!classname)
3246         return NULL;
3247     return ClassDef(classname, bases, s, decorator_seq,
3248                     LINENO(n), n->n_col_offset, c->c_arena);
3249 }
3250 
3251 static stmt_ty
ast_for_stmt(struct compiling * c,const node * n)3252 ast_for_stmt(struct compiling *c, const node *n)
3253 {
3254     if (TYPE(n) == stmt) {
3255         assert(NCH(n) == 1);
3256         n = CHILD(n, 0);
3257     }
3258     if (TYPE(n) == simple_stmt) {
3259         assert(num_stmts(n) == 1);
3260         n = CHILD(n, 0);
3261     }
3262     if (TYPE(n) == small_stmt) {
3263         n = CHILD(n, 0);
3264         /* small_stmt: expr_stmt | print_stmt  | del_stmt | pass_stmt
3265                      | flow_stmt | import_stmt | global_stmt | exec_stmt
3266                      | assert_stmt
3267         */
3268         switch (TYPE(n)) {
3269             case expr_stmt:
3270                 return ast_for_expr_stmt(c, n);
3271             case print_stmt:
3272                 return ast_for_print_stmt(c, n);
3273             case del_stmt:
3274                 return ast_for_del_stmt(c, n);
3275             case pass_stmt:
3276                 return Pass(LINENO(n), n->n_col_offset, c->c_arena);
3277             case flow_stmt:
3278                 return ast_for_flow_stmt(c, n);
3279             case import_stmt:
3280                 return ast_for_import_stmt(c, n);
3281             case global_stmt:
3282                 return ast_for_global_stmt(c, n);
3283             case exec_stmt:
3284                 return ast_for_exec_stmt(c, n);
3285             case assert_stmt:
3286                 return ast_for_assert_stmt(c, n);
3287             default:
3288                 PyErr_Format(PyExc_SystemError,
3289                              "unhandled small_stmt: TYPE=%d NCH=%d\n",
3290                              TYPE(n), NCH(n));
3291                 return NULL;
3292         }
3293     }
3294     else {
3295         /* compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt
3296                         | funcdef | classdef | decorated
3297         */
3298         node *ch = CHILD(n, 0);
3299         REQ(n, compound_stmt);
3300         switch (TYPE(ch)) {
3301             case if_stmt:
3302                 return ast_for_if_stmt(c, ch);
3303             case while_stmt:
3304                 return ast_for_while_stmt(c, ch);
3305             case for_stmt:
3306                 return ast_for_for_stmt(c, ch);
3307             case try_stmt:
3308                 return ast_for_try_stmt(c, ch);
3309             case with_stmt:
3310                 return ast_for_with_stmt(c, ch);
3311             case funcdef:
3312                 return ast_for_funcdef(c, ch, NULL);
3313             case classdef:
3314                 return ast_for_classdef(c, ch, NULL);
3315             case decorated:
3316                 return ast_for_decorated(c, ch);
3317             default:
3318                 PyErr_Format(PyExc_SystemError,
3319                              "unhandled small_stmt: TYPE=%d NCH=%d\n",
3320                              TYPE(n), NCH(n));
3321                 return NULL;
3322         }
3323     }
3324 }
3325 
3326 static PyObject *
parsenumber(struct compiling * c,const char * s)3327 parsenumber(struct compiling *c, const char *s)
3328 {
3329         const char *end;
3330         long x;
3331         double dx;
3332 #ifndef WITHOUT_COMPLEX
3333         Py_complex complex;
3334         int imflag;
3335 #endif
3336 
3337         assert(s != NULL);
3338         errno = 0;
3339         end = s + strlen(s) - 1;
3340 #ifndef WITHOUT_COMPLEX
3341         imflag = *end == 'j' || *end == 'J';
3342 #endif
3343         if (*end == 'l' || *end == 'L')
3344                 return PyLong_FromString((char *)s, (char **)0, 0);
3345         x = PyOS_strtol((char *)s, (char **)&end, 0);
3346         if (*end == '\0') {
3347                 if (errno != 0)
3348                         return PyLong_FromString((char *)s, (char **)0, 0);
3349                 return PyInt_FromLong(x);
3350         }
3351         /* XXX Huge floats may silently fail */
3352 #ifndef WITHOUT_COMPLEX
3353         if (imflag) {
3354                 complex.real = 0.;
3355                 complex.imag = PyOS_string_to_double(s, (char **)&end, NULL);
3356                 if (complex.imag == -1.0 && PyErr_Occurred())
3357                         return NULL;
3358                 return PyComplex_FromCComplex(complex);
3359         }
3360         else
3361 #endif
3362         {
3363                 dx = PyOS_string_to_double(s, NULL, NULL);
3364                 if (dx == -1.0 && PyErr_Occurred())
3365                         return NULL;
3366                 return PyFloat_FromDouble(dx);
3367         }
3368 }
3369 
3370 static PyObject *
decode_utf8(struct compiling * c,const char ** sPtr,const char * end,char * encoding)3371 decode_utf8(struct compiling *c, const char **sPtr, const char *end, char* encoding)
3372 {
3373 #ifndef Py_USING_UNICODE
3374         Py_FatalError("decode_utf8 should not be called in this build.");
3375         return NULL;
3376 #else
3377         PyObject *u, *v;
3378         char *s, *t;
3379         t = s = (char *)*sPtr;
3380         /* while (s < end && *s != '\\') s++; */ /* inefficient for u".." */
3381         while (s < end && (*s & 0x80)) s++;
3382         *sPtr = s;
3383         u = PyUnicode_DecodeUTF8(t, s - t, NULL);
3384         if (u == NULL)
3385                 return NULL;
3386         v = PyUnicode_AsEncodedString(u, encoding, NULL);
3387         Py_DECREF(u);
3388         return v;
3389 #endif
3390 }
3391 
3392 #ifdef Py_USING_UNICODE
3393 static PyObject *
decode_unicode(struct compiling * c,const char * s,size_t len,int rawmode,const char * encoding)3394 decode_unicode(struct compiling *c, const char *s, size_t len, int rawmode, const char *encoding)
3395 {
3396         PyObject *v;
3397         PyObject *u = NULL;
3398         char *buf;
3399         char *p;
3400         const char *end;
3401         if (encoding != NULL && strcmp(encoding, "iso-8859-1")) {
3402                 /* check for integer overflow */
3403                 if (len > PY_SIZE_MAX / 6)
3404                         return NULL;
3405                 /* "<C3><A4>" (2 bytes) may become "\U000000E4" (10 bytes), or 1:5
3406                    "\ä" (3 bytes) may become "\u005c\U000000E4" (16 bytes), or ~1:6 */
3407                 u = PyString_FromStringAndSize((char *)NULL, len * 6);
3408                 if (u == NULL)
3409                         return NULL;
3410                 p = buf = PyString_AsString(u);
3411                 end = s + len;
3412                 while (s < end) {
3413                         if (*s == '\\') {
3414                                 *p++ = *s++;
3415                                 if (*s & 0x80) {
3416                                         strcpy(p, "u005c");
3417                                         p += 5;
3418                                 }
3419                         }
3420                         if (*s & 0x80) { /* XXX inefficient */
3421                                 PyObject *w;
3422                                 char *r;
3423                                 Py_ssize_t rn, i;
3424                                 w = decode_utf8(c, &s, end, "utf-32-be");
3425                                 if (w == NULL) {
3426                                         Py_DECREF(u);
3427                                         return NULL;
3428                                 }
3429                                 r = PyString_AsString(w);
3430                                 rn = PyString_Size(w);
3431                                 assert(rn % 4 == 0);
3432                                 for (i = 0; i < rn; i += 4) {
3433                                         sprintf(p, "\\U%02x%02x%02x%02x",
3434                                                 r[i + 0] & 0xFF,
3435                                                 r[i + 1] & 0xFF,
3436                                                 r[i + 2] & 0xFF,
3437                                                 r[i + 3] & 0xFF);
3438                                         p += 10;
3439                                 }
3440                                 Py_DECREF(w);
3441                         } else {
3442                                 *p++ = *s++;
3443                         }
3444                 }
3445                 len = p - buf;
3446                 s = buf;
3447         }
3448         if (rawmode)
3449                 v = PyUnicode_DecodeRawUnicodeEscape(s, len, NULL);
3450         else
3451                 v = PyUnicode_DecodeUnicodeEscape(s, len, NULL);
3452         Py_XDECREF(u);
3453         return v;
3454 }
3455 #endif
3456 
3457 /* s is a Python string literal, including the bracketing quote characters,
3458  * and r &/or u prefixes (if any), and embedded escape sequences (if any).
3459  * parsestr parses it, and returns the decoded Python string object.
3460  */
3461 static PyObject *
parsestr(struct compiling * c,const node * n,const char * s)3462 parsestr(struct compiling *c, const node *n, const char *s)
3463 {
3464         size_t len, i;
3465         int quote = Py_CHARMASK(*s);
3466         int rawmode = 0;
3467         int need_encoding;
3468         int unicode = c->c_future_unicode;
3469         int bytes = 0;
3470 
3471         if (isalpha(quote) || quote == '_') {
3472                 if (quote == 'u' || quote == 'U') {
3473                         quote = *++s;
3474                         unicode = 1;
3475                 }
3476                 if (quote == 'b' || quote == 'B') {
3477                         quote = *++s;
3478                         unicode = 0;
3479                         bytes = 1;
3480                 }
3481                 if (quote == 'r' || quote == 'R') {
3482                         quote = *++s;
3483                         rawmode = 1;
3484                 }
3485         }
3486         if (quote != '\'' && quote != '\"') {
3487                 PyErr_BadInternalCall();
3488                 return NULL;
3489         }
3490         s++;
3491         len = strlen(s);
3492         if (len > INT_MAX) {
3493                 PyErr_SetString(PyExc_OverflowError,
3494                                 "string to parse is too long");
3495                 return NULL;
3496         }
3497         if (s[--len] != quote) {
3498                 PyErr_BadInternalCall();
3499                 return NULL;
3500         }
3501         if (len >= 4 && s[0] == quote && s[1] == quote) {
3502                 s += 2;
3503                 len -= 2;
3504                 if (s[--len] != quote || s[--len] != quote) {
3505                         PyErr_BadInternalCall();
3506                         return NULL;
3507                 }
3508         }
3509         if (Py_Py3kWarningFlag && bytes) {
3510             for (i = 0; i < len; i++) {
3511                 if ((unsigned char)s[i] > 127) {
3512                     if (!ast_warn(c, n,
3513                         "non-ascii bytes literals not supported in 3.x"))
3514                         return NULL;
3515                     break;
3516                 }
3517             }
3518         }
3519 #ifdef Py_USING_UNICODE
3520         if (unicode || Py_UnicodeFlag) {
3521                 return decode_unicode(c, s, len, rawmode, c->c_encoding);
3522         }
3523 #endif
3524         need_encoding = (c->c_encoding != NULL &&
3525                          strcmp(c->c_encoding, "utf-8") != 0 &&
3526                          strcmp(c->c_encoding, "iso-8859-1") != 0);
3527         if (rawmode || strchr(s, '\\') == NULL) {
3528                 if (need_encoding) {
3529 #ifndef Py_USING_UNICODE
3530                         /* This should not happen - we never see any other
3531                            encoding. */
3532                         Py_FatalError(
3533                             "cannot deal with encodings in this build.");
3534 #else
3535                         PyObject *v, *u = PyUnicode_DecodeUTF8(s, len, NULL);
3536                         if (u == NULL)
3537                                 return NULL;
3538                         v = PyUnicode_AsEncodedString(u, c->c_encoding, NULL);
3539                         Py_DECREF(u);
3540                         return v;
3541 #endif
3542                 } else {
3543                         return PyString_FromStringAndSize(s, len);
3544                 }
3545         }
3546 
3547         return PyString_DecodeEscape(s, len, NULL, unicode,
3548                                      need_encoding ? c->c_encoding : NULL);
3549 }
3550 
3551 /* Build a Python string object out of a STRING atom.  This takes care of
3552  * compile-time literal catenation, calling parsestr() on each piece, and
3553  * pasting the intermediate results together.
3554  */
3555 static PyObject *
parsestrplus(struct compiling * c,const node * n)3556 parsestrplus(struct compiling *c, const node *n)
3557 {
3558         PyObject *v;
3559         int i;
3560         REQ(CHILD(n, 0), STRING);
3561         if ((v = parsestr(c, n, STR(CHILD(n, 0)))) != NULL) {
3562                 /* String literal concatenation */
3563                 for (i = 1; i < NCH(n); i++) {
3564                         PyObject *s;
3565                         s = parsestr(c, n, STR(CHILD(n, i)));
3566                         if (s == NULL)
3567                                 goto onError;
3568                         if (PyString_Check(v) && PyString_Check(s)) {
3569                                 PyString_ConcatAndDel(&v, s);
3570                                 if (v == NULL)
3571                                     goto onError;
3572                         }
3573 #ifdef Py_USING_UNICODE
3574                         else {
3575                                 PyObject *temp = PyUnicode_Concat(v, s);
3576                                 Py_DECREF(s);
3577                                 Py_DECREF(v);
3578                                 v = temp;
3579                                 if (v == NULL)
3580                                     goto onError;
3581                         }
3582 #endif
3583                 }
3584         }
3585         return v;
3586 
3587  onError:
3588         Py_XDECREF(v);
3589         return NULL;
3590 }
3591