1 /*
2  * This file includes functions to transform a concrete syntax tree (CST) to
3  * an abstract syntax tree (AST).  The main function is Ta27AST_FromNode().
4  *
5  */
6 #include "Python.h"
7 #include "../Include/Python-ast.h"
8 #include "../Include/grammar.h"
9 #include "../Include/node.h"
10 #include "../Include/ast.h"
11 #include "../Include/token.h"
12 #include "../Include/parsetok.h"
13 #include "../Include/graminit.h"
14 #include "unicodeobject.h"
15 
16 #include <assert.h>
17 
18 /* Data structure used internally */
19 struct compiling {
20     char *c_encoding; /* source encoding */
21     int c_future_unicode; /* __future__ unicode literals flag */
22     PyArena *c_arena; /* arena for allocating memeory */
23     const char *c_filename; /* filename */
24 };
25 
26 static asdl_seq *seq_for_testlist(struct compiling *, const node *);
27 static expr_ty ast_for_expr(struct compiling *, const node *);
28 static stmt_ty ast_for_stmt(struct compiling *, const node *);
29 static asdl_seq *ast_for_suite(struct compiling *, const node *);
30 static asdl_seq *ast_for_exprlist(struct compiling *, const node *,
31                                   expr_context_ty);
32 static expr_ty ast_for_testlist(struct compiling *, const node *);
33 static stmt_ty ast_for_classdef(struct compiling *, const node *, asdl_seq *);
34 static expr_ty ast_for_testlist_comp(struct compiling *, const node *);
35 
36 /* Note different signature for ast_for_call */
37 static expr_ty ast_for_call(struct compiling *, const node *, expr_ty);
38 
39 static PyObject *parsenumber(struct compiling *, const char *);
40 static PyObject *parsestr(struct compiling *, const node *n, const char *);
41 static PyObject *parsestrplus(struct compiling *, const node *n);
42 
43 static int Py_Py3kWarningFlag = 0;
44 static int Py_UnicodeFlag = 0;
45 
46 extern long Ta27OS_strtol(char *str, char **ptr, int base);
47 
48 #ifndef LINENO
49 #define LINENO(n)       ((n)->n_lineno)
50 #endif
51 
52 #define COMP_GENEXP 0
53 #define COMP_SETCOMP  1
54 
55 static identifier
new_identifier(const char * n,PyArena * arena)56 new_identifier(const char* n, PyArena *arena) {
57     PyObject* id = PyUnicode_InternFromString(n);
58     if (id != NULL)
59         PyArena_AddPyObject(arena, id);
60     return id;
61 }
62 
63 #define NEW_IDENTIFIER(n) new_identifier(STR(n), c->c_arena)
64 
65 static string
new_type_comment(const char * s,struct compiling * c)66 new_type_comment(const char *s, struct compiling *c)
67 {
68   return PyUnicode_DecodeUTF8(s, strlen(s), NULL);
69 }
70 #define NEW_TYPE_COMMENT(n) new_type_comment(STR(n), c)
71 
72 
73 /* This routine provides an invalid object for the syntax error.
74    The outermost routine must unpack this error and create the
75    proper object.  We do this so that we don't have to pass
76    the filename to everything function.
77 
78    XXX Maybe we should just pass the filename...
79 */
80 
81 static int
ast_error(const node * n,const char * errstr)82 ast_error(const node *n, const char *errstr)
83 {
84     PyObject *u = Py_BuildValue("zi", errstr, LINENO(n));
85     if (!u)
86         return 0;
87     PyErr_SetObject(PyExc_SyntaxError, u);
88     Py_DECREF(u);
89     return 0;
90 }
91 
92 static void
ast_error_finish(const char * filename)93 ast_error_finish(const char *filename)
94 {
95     PyObject *type, *value, *tback, *errstr, *loc, *tmp;
96     long lineno;
97 
98     assert(PyErr_Occurred());
99     if (!PyErr_ExceptionMatches(PyExc_SyntaxError))
100         return;
101 
102     PyErr_Fetch(&type, &value, &tback);
103     errstr = PyTuple_GetItem(value, 0);
104     if (!errstr)
105         return;
106     Py_INCREF(errstr);
107     lineno = PyLong_AsLong(PyTuple_GetItem(value, 1));
108     if (lineno == -1) {
109         Py_DECREF(errstr);
110         return;
111     }
112     Py_DECREF(value);
113 
114     loc = PyErr_ProgramText(filename, lineno);
115     if (!loc) {
116         Py_INCREF(Py_None);
117         loc = Py_None;
118     }
119     tmp = Py_BuildValue("(zlOO)", filename, lineno, Py_None, loc);
120     Py_DECREF(loc);
121     if (!tmp) {
122         Py_DECREF(errstr);
123         return;
124     }
125     value = PyTuple_Pack(2, errstr, tmp);
126     Py_DECREF(errstr);
127     Py_DECREF(tmp);
128     if (!value)
129         return;
130     PyErr_Restore(type, value, tback);
131 }
132 
133 static int
ast_warn(struct compiling * c,const node * n,char * msg)134 ast_warn(struct compiling *c, const node *n, char *msg)
135 {
136     if (PyErr_WarnExplicit(PyExc_SyntaxWarning, msg, c->c_filename, LINENO(n),
137                            NULL, NULL) < 0) {
138         /* if -Werr, change it to a SyntaxError */
139         if (PyErr_Occurred() && PyErr_ExceptionMatches(PyExc_SyntaxWarning))
140             ast_error(n, msg);
141         return 0;
142     }
143     return 1;
144 }
145 
146 static int
forbidden_check(struct compiling * c,const node * n,const char * x)147 forbidden_check(struct compiling *c, const node *n, const char *x)
148 {
149     if (!strcmp(x, "None"))
150         return ast_error(n, "cannot assign to None");
151     if (!strcmp(x, "__debug__"))
152         return ast_error(n, "cannot assign to __debug__");
153     if (Py_Py3kWarningFlag) {
154         if (!(strcmp(x, "True") && strcmp(x, "False")) &&
155             !ast_warn(c, n, "assignment to True or False is forbidden in 3.x"))
156             return 0;
157         if (!strcmp(x, "nonlocal") &&
158             !ast_warn(c, n, "nonlocal is a keyword in 3.x"))
159             return 0;
160     }
161     return 1;
162 }
163 
164 /* num_stmts() returns number of contained statements.
165 
166    Use this routine to determine how big a sequence is needed for
167    the statements in a parse tree.  Its raison d'etre is this bit of
168    grammar:
169 
170    stmt: simple_stmt | compound_stmt
171    simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
172 
173    A simple_stmt can contain multiple small_stmt elements joined
174    by semicolons.  If the arg is a simple_stmt, the number of
175    small_stmt elements is returned.
176 */
177 
178 static int
num_stmts(const node * n)179 num_stmts(const node *n)
180 {
181     int i, l;
182     node *ch;
183 
184     switch (TYPE(n)) {
185         case single_input:
186             if (TYPE(CHILD(n, 0)) == NEWLINE)
187                 return 0;
188             else
189                 return num_stmts(CHILD(n, 0));
190         case file_input:
191             l = 0;
192             for (i = 0; i < NCH(n); i++) {
193                 ch = CHILD(n, i);
194                 if (TYPE(ch) == stmt)
195                     l += num_stmts(ch);
196             }
197             return l;
198         case stmt:
199             return num_stmts(CHILD(n, 0));
200         case compound_stmt:
201             return 1;
202         case simple_stmt:
203             return NCH(n) / 2; /* Divide by 2 to remove count of semi-colons */
204         case suite:
205             /* suite: simple_stmt | NEWLINE [TYPE_COMMENT NEWLINE] INDENT stmt+ DEDENT */
206             if (NCH(n) == 1)
207                 return num_stmts(CHILD(n, 0));
208             else {
209                 i = 2;
210                 l = 0;
211                 if (TYPE(CHILD(n, 1)) == TYPE_COMMENT)
212                     i += 2;
213                 for (; i < (NCH(n) - 1); i++)
214                     l += num_stmts(CHILD(n, i));
215                 return l;
216             }
217         default: {
218             char buf[128];
219 
220             sprintf(buf, "Non-statement found: %d %d",
221                     TYPE(n), NCH(n));
222             Py_FatalError(buf);
223         }
224     }
225     assert(0);
226     return 0;
227 }
228 
229 /* Transform the CST rooted at node * to the appropriate AST
230 */
231 
232 mod_ty
Ta27AST_FromNode(const node * n,PyCompilerFlags * flags,const char * filename,PyArena * arena)233 Ta27AST_FromNode(const node *n, PyCompilerFlags *flags, const char *filename,
234                PyArena *arena)
235 {
236     int i, j, k, num;
237     asdl_seq *stmts = NULL;
238     asdl_seq *type_ignores = NULL;
239     stmt_ty s;
240     node *ch;
241     struct compiling c;
242     asdl_seq *argtypes = NULL;
243     expr_ty ret, arg;
244 
245     if (flags && flags->cf_flags & PyCF_SOURCE_IS_UTF8) {
246         c.c_encoding = "utf-8";
247         if (TYPE(n) == encoding_decl) {
248             ast_error(n, "encoding declaration in Unicode string");
249             goto error;
250         }
251     } else if (TYPE(n) == encoding_decl) {
252         c.c_encoding = STR(n);
253         n = CHILD(n, 0);
254     } else {
255         c.c_encoding = NULL;
256     }
257     c.c_future_unicode = flags && flags->cf_flags & CO_FUTURE_UNICODE_LITERALS;
258     c.c_arena = arena;
259     c.c_filename = filename;
260 
261     k = 0;
262     switch (TYPE(n)) {
263         case file_input:
264             stmts = asdl_seq_new(num_stmts(n), arena);
265             if (!stmts)
266                 return NULL;
267             for (i = 0; i < NCH(n) - 1; i++) {
268                 ch = CHILD(n, i);
269                 if (TYPE(ch) == NEWLINE)
270                     continue;
271                 REQ(ch, stmt);
272                 num = num_stmts(ch);
273                 if (num == 1) {
274                     s = ast_for_stmt(&c, ch);
275                     if (!s)
276                         goto error;
277                     asdl_seq_SET(stmts, k++, s);
278                 }
279                 else {
280                     ch = CHILD(ch, 0);
281                     REQ(ch, simple_stmt);
282                     for (j = 0; j < num; j++) {
283                         s = ast_for_stmt(&c, CHILD(ch, j * 2));
284                         if (!s)
285                             goto error;
286                         asdl_seq_SET(stmts, k++, s);
287                     }
288                 }
289             }
290             /* Type ignores are stored under the ENDMARKER in file_input. */
291             ch = CHILD(n, NCH(n) - 1);
292             REQ(ch, ENDMARKER);
293             num = NCH(ch);
294             type_ignores = _Ta27_asdl_seq_new(num, arena);
295             if (!type_ignores)
296                 goto error;
297 
298             for (i = 0; i < num; i++) {
299                 string type_comment = new_type_comment(STR(CHILD(ch, i)), &c);
300                 if (!type_comment)
301                     goto error;
302                 type_ignore_ty ti = TypeIgnore(LINENO(CHILD(ch, i)), type_comment, arena);
303                 if (!ti)
304                     goto error;
305                 asdl_seq_SET(type_ignores, i, ti);
306             }
307 
308             return Module(stmts, type_ignores, arena);
309         case eval_input: {
310             expr_ty testlist_ast;
311 
312             /* XXX Why not comp_for here? */
313             testlist_ast = ast_for_testlist(&c, CHILD(n, 0));
314             if (!testlist_ast)
315                 goto error;
316             return Expression(testlist_ast, arena);
317         }
318         case single_input:
319             if (TYPE(CHILD(n, 0)) == NEWLINE) {
320                 stmts = asdl_seq_new(1, arena);
321                 if (!stmts)
322                     goto error;
323                 asdl_seq_SET(stmts, 0, Pass(n->n_lineno, n->n_col_offset,
324                                             arena));
325                 if (!asdl_seq_GET(stmts, 0))
326                     goto error;
327                 return Interactive(stmts, arena);
328             }
329             else {
330                 n = CHILD(n, 0);
331                 num = num_stmts(n);
332                 stmts = asdl_seq_new(num, arena);
333                 if (!stmts)
334                     goto error;
335                 if (num == 1) {
336                     s = ast_for_stmt(&c, n);
337                     if (!s)
338                         goto error;
339                     asdl_seq_SET(stmts, 0, s);
340                 }
341                 else {
342                     /* Only a simple_stmt can contain multiple statements. */
343                     REQ(n, simple_stmt);
344                     for (i = 0; i < NCH(n); i += 2) {
345                         if (TYPE(CHILD(n, i)) == NEWLINE)
346                             break;
347                         s = ast_for_stmt(&c, CHILD(n, i));
348                         if (!s)
349                             goto error;
350                         asdl_seq_SET(stmts, i / 2, s);
351                     }
352                 }
353 
354                 return Interactive(stmts, arena);
355             }
356         case func_type_input:
357             n = CHILD(n, 0);
358             REQ(n, func_type);
359 
360             if (TYPE(CHILD(n, 1)) == typelist) {
361                 ch = CHILD(n, 1);
362                 /* this is overly permissive -- we don't pay any attention to
363                  * stars on the args -- just parse them into an ordered list */
364                 num = 0;
365                 for (i = 0; i < NCH(ch); i++) {
366                     if (TYPE(CHILD(ch, i)) == test)
367                         num++;
368                 }
369 
370                 argtypes = _Ta27_asdl_seq_new(num, arena);
371 
372                 j = 0;
373                 for (i = 0; i < NCH(ch); i++) {
374                     if (TYPE(CHILD(ch, i)) == test) {
375                         arg = ast_for_expr(&c, CHILD(ch, i));
376                         if (!arg)
377                             goto error;
378                         asdl_seq_SET(argtypes, j++, arg);
379                     }
380                 }
381             }
382             else
383                 argtypes = _Ta27_asdl_seq_new(0, arena);
384 
385             ret = ast_for_expr(&c, CHILD(n, NCH(n) - 1));
386             if (!ret)
387                 goto error;
388             return FunctionType(argtypes, ret, arena);
389         default:
390             PyErr_Format(PyExc_SystemError,
391                          "invalid node %d for Ta27AST_FromNode", TYPE(n));
392             goto error;
393     }
394  error:
395     ast_error_finish(filename);
396     return NULL;
397 }
398 
399 /* Return the AST repr. of the operator represented as syntax (|, ^, etc.)
400 */
401 
402 static operator_ty
get_operator(const node * n)403 get_operator(const node *n)
404 {
405     switch (TYPE(n)) {
406         case VBAR:
407             return BitOr;
408         case CIRCUMFLEX:
409             return BitXor;
410         case AMPER:
411             return BitAnd;
412         case LEFTSHIFT:
413             return LShift;
414         case RIGHTSHIFT:
415             return RShift;
416         case PLUS:
417             return Add;
418         case MINUS:
419             return Sub;
420         case STAR:
421             return Mult;
422         case SLASH:
423             return Div;
424         case DOUBLESLASH:
425             return FloorDiv;
426         case PERCENT:
427             return Mod;
428         default:
429             return (operator_ty)0;
430     }
431 }
432 
433 /* Set the context ctx for expr_ty e, recursively traversing e.
434 
435    Only sets context for expr kinds that "can appear in assignment context"
436    (according to ../Parser/Python.asdl).  For other expr kinds, it sets
437    an appropriate syntax error and returns false.
438 */
439 
440 static int
set_context(struct compiling * c,expr_ty e,expr_context_ty ctx,const node * n)441 set_context(struct compiling *c, expr_ty e, expr_context_ty ctx, const node *n)
442 {
443     asdl_seq *s = NULL;
444     /* If a particular expression type can't be used for assign / delete,
445        set expr_name to its name and an error message will be generated.
446     */
447     const char* expr_name = NULL;
448 
449     /* The ast defines augmented store and load contexts, but the
450        implementation here doesn't actually use them.  The code may be
451        a little more complex than necessary as a result.  It also means
452        that expressions in an augmented assignment have a Store context.
453        Consider restructuring so that augmented assignment uses
454        set_context(), too.
455     */
456     assert(ctx != AugStore && ctx != AugLoad);
457 
458     switch (e->kind) {
459         case Attribute_kind:
460             if (ctx == Store && !forbidden_check(c, n,
461                                 PyUnicode_AsUTF8(e->v.Attribute.attr)))
462                     return 0;
463             e->v.Attribute.ctx = ctx;
464             break;
465         case Subscript_kind:
466             e->v.Subscript.ctx = ctx;
467             break;
468         case Name_kind:
469             if (ctx == Store && !forbidden_check(c, n,
470                                 PyUnicode_AsUTF8(e->v.Name.id)))
471                     return 0;
472             e->v.Name.ctx = ctx;
473             break;
474         case List_kind:
475             e->v.List.ctx = ctx;
476             s = e->v.List.elts;
477             break;
478         case Tuple_kind:
479             if (asdl_seq_LEN(e->v.Tuple.elts))  {
480                 e->v.Tuple.ctx = ctx;
481                 s = e->v.Tuple.elts;
482             }
483             else {
484                 expr_name = "()";
485             }
486             break;
487         case Lambda_kind:
488             expr_name = "lambda";
489             break;
490         case Call_kind:
491             expr_name = "function call";
492             break;
493         case BoolOp_kind:
494         case BinOp_kind:
495         case UnaryOp_kind:
496             expr_name = "operator";
497             break;
498         case GeneratorExp_kind:
499             expr_name = "generator expression";
500             break;
501         case Yield_kind:
502             expr_name = "yield expression";
503             break;
504         case ListComp_kind:
505             expr_name = "list comprehension";
506             break;
507         case SetComp_kind:
508             expr_name = "set comprehension";
509             break;
510         case DictComp_kind:
511             expr_name = "dict comprehension";
512             break;
513         case Dict_kind:
514         case Set_kind:
515         case Num_kind:
516         case Str_kind:
517             expr_name = "literal";
518             break;
519         case Compare_kind:
520             expr_name = "comparison";
521             break;
522         case Repr_kind:
523             expr_name = "repr";
524             break;
525         case IfExp_kind:
526             expr_name = "conditional expression";
527             break;
528         default:
529             PyErr_Format(PyExc_SystemError,
530                          "unexpected expression in assignment %d (line %d)",
531                          e->kind, e->lineno);
532             return 0;
533     }
534     /* Check for error string set by switch */
535     if (expr_name) {
536         char buf[300];
537         PyOS_snprintf(buf, sizeof(buf),
538                       "can't %s %s",
539                       ctx == Store ? "assign to" : "delete",
540                       expr_name);
541         return ast_error(n, buf);
542     }
543 
544     /* If the LHS is a list or tuple, we need to set the assignment
545        context for all the contained elements.
546     */
547     if (s) {
548         int i;
549 
550         for (i = 0; i < asdl_seq_LEN(s); i++) {
551             if (!set_context(c, (expr_ty)asdl_seq_GET(s, i), ctx, n))
552                 return 0;
553         }
554     }
555     return 1;
556 }
557 
558 static operator_ty
ast_for_augassign(struct compiling * c,const node * n)559 ast_for_augassign(struct compiling *c, const node *n)
560 {
561     REQ(n, augassign);
562     n = CHILD(n, 0);
563     switch (STR(n)[0]) {
564         case '+':
565             return Add;
566         case '-':
567             return Sub;
568         case '/':
569             if (STR(n)[1] == '/')
570                 return FloorDiv;
571             else
572                 return Div;
573         case '%':
574             return Mod;
575         case '<':
576             return LShift;
577         case '>':
578             return RShift;
579         case '&':
580             return BitAnd;
581         case '^':
582             return BitXor;
583         case '|':
584             return BitOr;
585         case '*':
586             if (STR(n)[1] == '*')
587                 return Pow;
588             else
589                 return Mult;
590         default:
591             PyErr_Format(PyExc_SystemError, "invalid augassign: %s", STR(n));
592             return (operator_ty)0;
593     }
594 }
595 
596 static cmpop_ty
ast_for_comp_op(struct compiling * c,const node * n)597 ast_for_comp_op(struct compiling *c, const node *n)
598 {
599     /* comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'
600                |'is' 'not'
601     */
602     REQ(n, comp_op);
603     if (NCH(n) == 1) {
604         n = CHILD(n, 0);
605         switch (TYPE(n)) {
606             case LESS:
607                 return Lt;
608             case GREATER:
609                 return Gt;
610             case EQEQUAL:                       /* == */
611                 return Eq;
612             case LESSEQUAL:
613                 return LtE;
614             case GREATEREQUAL:
615                 return GtE;
616             case NOTEQUAL:
617                 return NotEq;
618             case NAME:
619                 if (strcmp(STR(n), "in") == 0)
620                     return In;
621                 if (strcmp(STR(n), "is") == 0)
622                     return Is;
623             default:
624                 PyErr_Format(PyExc_SystemError, "invalid comp_op: %s",
625                              STR(n));
626                 return (cmpop_ty)0;
627         }
628     }
629     else if (NCH(n) == 2) {
630         /* handle "not in" and "is not" */
631         switch (TYPE(CHILD(n, 0))) {
632             case NAME:
633                 if (strcmp(STR(CHILD(n, 1)), "in") == 0)
634                     return NotIn;
635                 if (strcmp(STR(CHILD(n, 0)), "is") == 0)
636                     return IsNot;
637             default:
638                 PyErr_Format(PyExc_SystemError, "invalid comp_op: %s %s",
639                              STR(CHILD(n, 0)), STR(CHILD(n, 1)));
640                 return (cmpop_ty)0;
641         }
642     }
643     PyErr_Format(PyExc_SystemError, "invalid comp_op: has %d children",
644                  NCH(n));
645     return (cmpop_ty)0;
646 }
647 
648 static asdl_seq *
seq_for_testlist(struct compiling * c,const node * n)649 seq_for_testlist(struct compiling *c, const node *n)
650 {
651     /* testlist: test (',' test)* [','] */
652     asdl_seq *seq;
653     expr_ty expression;
654     int i;
655     assert(TYPE(n) == testlist ||
656            TYPE(n) == listmaker ||
657            TYPE(n) == testlist_comp ||
658            TYPE(n) == testlist_safe ||
659            TYPE(n) == testlist1);
660 
661     seq = asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
662     if (!seq)
663         return NULL;
664 
665     for (i = 0; i < NCH(n); i += 2) {
666         assert(TYPE(CHILD(n, i)) == test || TYPE(CHILD(n, i)) == old_test);
667 
668         expression = ast_for_expr(c, CHILD(n, i));
669         if (!expression)
670             return NULL;
671 
672         assert(i / 2 < seq->size);
673         asdl_seq_SET(seq, i / 2, expression);
674     }
675     return seq;
676 }
677 
678 static expr_ty
compiler_complex_args(struct compiling * c,const node * n)679 compiler_complex_args(struct compiling *c, const node *n)
680 {
681     int i, len = (NCH(n) + 1) / 2;
682     expr_ty result;
683     asdl_seq *args = asdl_seq_new(len, c->c_arena);
684     if (!args)
685         return NULL;
686 
687     /* fpdef: NAME | '(' fplist ')'
688        fplist: fpdef (',' fpdef)* [',']
689     */
690     REQ(n, fplist);
691     for (i = 0; i < len; i++) {
692         PyObject *arg_id;
693         const node *fpdef_node = CHILD(n, 2*i);
694         const node *child;
695         expr_ty arg;
696 set_name:
697         /* fpdef_node is either a NAME or an fplist */
698         child = CHILD(fpdef_node, 0);
699         if (TYPE(child) == NAME) {
700             if (!forbidden_check(c, n, STR(child)))
701                 return NULL;
702             arg_id = NEW_IDENTIFIER(child);
703             if (!arg_id)
704                 return NULL;
705             arg = Name(arg_id, Store, LINENO(child), child->n_col_offset,
706                        c->c_arena);
707         }
708         else {
709             assert(TYPE(fpdef_node) == fpdef);
710             /* fpdef_node[0] is not a name, so it must be '(', get CHILD[1] */
711             child = CHILD(fpdef_node, 1);
712             assert(TYPE(child) == fplist);
713             /* NCH == 1 means we have (x), we need to elide the extra parens */
714             if (NCH(child) == 1) {
715                 fpdef_node = CHILD(child, 0);
716                 assert(TYPE(fpdef_node) == fpdef);
717                 goto set_name;
718             }
719             arg = compiler_complex_args(c, child);
720         }
721         asdl_seq_SET(args, i, arg);
722     }
723 
724     result = Tuple(args, Store, LINENO(n), n->n_col_offset, c->c_arena);
725     if (!set_context(c, result, Store, n))
726         return NULL;
727     return result;
728 }
729 
730 
731 /* Create AST for argument list. */
732 
733 static arguments_ty
ast_for_arguments(struct compiling * c,const node * n)734 ast_for_arguments(struct compiling *c, const node *n)
735 {
736     /* parameters: '(' [varargslist] ')'
737        varargslist: ((fpdef ['=' test] ',' [TYPE_COMMENT])*
738                      ('*' NAME [',' [TYPE_COMMENT]  '**' NAME] [TYPE_COMMENT] | '**' NAME [TYPE_COMMENT]) |
739                      fpdef ['=' test] (',' [TYPE_COMMENT] fpdef ['=' test])* [','] [TYPE_COMMENT])
740     */
741     int i, j, k, l, n_args = 0, n_all_args = 0, n_defaults = 0, found_default = 0;
742     asdl_seq *args, *defaults, *type_comments = NULL;
743     identifier vararg = NULL, kwarg = NULL;
744     node *ch;
745 
746     if (TYPE(n) == parameters) {
747         if (NCH(n) == 2) /* () as argument list */
748             return arguments(NULL, NULL, NULL, NULL, NULL, c->c_arena);
749         n = CHILD(n, 1);
750     }
751     REQ(n, varargslist);
752 
753     /* first count the number of normal args & defaults */
754     for (i = 0; i < NCH(n); i++) {
755         ch = CHILD(n, i);
756         if (TYPE(ch) == fpdef)
757             n_args++;
758         if (TYPE(ch) == EQUAL)
759             n_defaults++;
760         if (TYPE(ch) == STAR || TYPE(ch) == DOUBLESTAR)
761             n_all_args++;
762     }
763     n_all_args += n_args;
764     args = (n_args ? asdl_seq_new(n_args, c->c_arena) : NULL);
765     if (!args && n_args)
766         return NULL;
767     defaults = (n_defaults ? asdl_seq_new(n_defaults, c->c_arena) : NULL);
768     if (!defaults && n_defaults)
769         return NULL;
770     /* type_comments will be lazily initialized if needed.  If there are no
771        per-argument type comments, it will remain NULL.  Otherwise, it will be
772        an asdl_seq with length equal to the number of args (including varargs
773        and kwargs, if present) and with members set to the string of each arg's
774        type comment, if present, or NULL otherwise.
775      */
776 
777     /* fpdef: NAME | '(' fplist ')'
778        fplist: fpdef (',' fpdef)* [',']
779     */
780     i = 0;
781     j = 0;  /* index for defaults */
782     k = 0;  /* index for args */
783     l = 0;  /* index for type comments */
784     while (i < NCH(n)) {
785         ch = CHILD(n, i);
786         switch (TYPE(ch)) {
787             case fpdef: {
788                 int complex_args = 0, parenthesized = 0;
789             handle_fpdef:
790                 /* XXX Need to worry about checking if TYPE(CHILD(n, i+1)) is
791                    anything other than EQUAL or a comma? */
792                 /* XXX Should NCH(n) check be made a separate check? */
793                 if (i + 1 < NCH(n) && TYPE(CHILD(n, i + 1)) == EQUAL) {
794                     expr_ty expression = ast_for_expr(c, CHILD(n, i + 2));
795                     if (!expression)
796                         return NULL;
797                     assert(defaults != NULL);
798                     asdl_seq_SET(defaults, j++, expression);
799                     i += 2;
800                     found_default = 1;
801                 }
802                 else if (found_default) {
803                     /* def f((x)=4): pass should raise an error.
804                        def f((x, (y))): pass will just incur the tuple unpacking warning. */
805                     if (parenthesized && !complex_args) {
806                         ast_error(n, "parenthesized arg with default");
807                         return NULL;
808                     }
809                     ast_error(n,
810                              "non-default argument follows default argument");
811                     return NULL;
812                 }
813                 if (NCH(ch) == 3) {
814                     ch = CHILD(ch, 1);
815                     /* def foo((x)): is not complex, special case. */
816                     if (NCH(ch) != 1) {
817                         /* We have complex arguments, setup for unpacking. */
818                         if (Py_Py3kWarningFlag && !ast_warn(c, ch,
819                             "tuple parameter unpacking has been removed in 3.x"))
820                             return NULL;
821                         complex_args = 1;
822                         asdl_seq_SET(args, k++, compiler_complex_args(c, ch));
823                         if (!asdl_seq_GET(args, k-1))
824                                 return NULL;
825                     } else {
826                         /* def foo((x)): setup for checking NAME below. */
827                         /* Loop because there can be many parens and tuple
828                            unpacking mixed in. */
829                         parenthesized = 1;
830                         ch = CHILD(ch, 0);
831                         assert(TYPE(ch) == fpdef);
832                         goto handle_fpdef;
833                     }
834                 }
835                 if (TYPE(CHILD(ch, 0)) == NAME) {
836                     PyObject *id;
837                     expr_ty name;
838                     if (!forbidden_check(c, n, STR(CHILD(ch, 0))))
839                         return NULL;
840                     id = NEW_IDENTIFIER(CHILD(ch, 0));
841                     if (!id)
842                         return NULL;
843                     name = Name(id, Param, LINENO(ch), ch->n_col_offset,
844                                 c->c_arena);
845                     if (!name)
846                         return NULL;
847                     asdl_seq_SET(args, k++, name);
848 
849                 }
850                 i += 1; /* the name */
851                 if (i < NCH(n) && TYPE(CHILD(n, i)) == COMMA)
852                     i += 1; /* the comma, if present */
853                 if (parenthesized && Py_Py3kWarningFlag &&
854                     !ast_warn(c, ch, "parenthesized argument names "
855                               "are invalid in 3.x"))
856                     return NULL;
857 
858                 break;
859             }
860             case STAR:
861                 if (!forbidden_check(c, CHILD(n, i+1), STR(CHILD(n, i+1))))
862                     return NULL;
863                 vararg = NEW_IDENTIFIER(CHILD(n, i+1));
864                 if (!vararg)
865                     return NULL;
866                 i += 2; /* the star and the name */
867                 if (i < NCH(n) && TYPE(CHILD(n, i)) == COMMA)
868                     i += 1; /* the comma, if present */
869                 break;
870             case DOUBLESTAR:
871                 if (!forbidden_check(c, CHILD(n, i+1), STR(CHILD(n, i+1))))
872                     return NULL;
873                 kwarg = NEW_IDENTIFIER(CHILD(n, i+1));
874                 if (!kwarg)
875                     return NULL;
876                 i += 2; /* the double star and the name */
877                 if (i < NCH(n) && TYPE(CHILD(n, i)) == COMMA)
878                     i += 1; /* the comma, if present */
879                 break;
880             case TYPE_COMMENT:
881                 assert(l < k + !!vararg + !!kwarg);
882 
883                 if (!type_comments) {
884                     /* lazily allocate the type_comments seq for perf reasons */
885                     type_comments = asdl_seq_new(n_all_args, c->c_arena);
886                     if (!type_comments)
887                         return NULL;
888                 }
889 
890                 while (l < k + !!vararg + !!kwarg - 1) {
891                     asdl_seq_SET(type_comments, l++, NULL);
892                 }
893 
894                 asdl_seq_SET(type_comments, l++, NEW_TYPE_COMMENT(ch));
895                 i += 1;
896                 break;
897             default:
898                 PyErr_Format(PyExc_SystemError,
899                              "unexpected node in varargslist: %d @ %d",
900                              TYPE(ch), i);
901                 return NULL;
902         }
903     }
904 
905     if (type_comments) {
906         while (l < n_all_args) {
907             asdl_seq_SET(type_comments, l++, NULL);
908         }
909     }
910 
911     return arguments(args, vararg, kwarg, defaults, type_comments, c->c_arena);
912 }
913 
914 static expr_ty
ast_for_dotted_name(struct compiling * c,const node * n)915 ast_for_dotted_name(struct compiling *c, const node *n)
916 {
917     expr_ty e;
918     identifier id;
919     int lineno, col_offset;
920     int i;
921 
922     REQ(n, dotted_name);
923 
924     lineno = LINENO(n);
925     col_offset = n->n_col_offset;
926 
927     id = NEW_IDENTIFIER(CHILD(n, 0));
928     if (!id)
929         return NULL;
930     e = Name(id, Load, lineno, col_offset, c->c_arena);
931     if (!e)
932         return NULL;
933 
934     for (i = 2; i < NCH(n); i+=2) {
935         id = NEW_IDENTIFIER(CHILD(n, i));
936         if (!id)
937             return NULL;
938         e = Attribute(e, id, Load, lineno, col_offset, c->c_arena);
939         if (!e)
940             return NULL;
941     }
942 
943     return e;
944 }
945 
946 static expr_ty
ast_for_decorator(struct compiling * c,const node * n)947 ast_for_decorator(struct compiling *c, const node *n)
948 {
949     /* decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE */
950     expr_ty d = NULL;
951     expr_ty name_expr;
952 
953     REQ(n, decorator);
954     REQ(CHILD(n, 0), AT);
955     REQ(RCHILD(n, -1), NEWLINE);
956 
957     name_expr = ast_for_dotted_name(c, CHILD(n, 1));
958     if (!name_expr)
959         return NULL;
960 
961     if (NCH(n) == 3) { /* No arguments */
962         d = name_expr;
963         name_expr = NULL;
964     }
965     else if (NCH(n) == 5) { /* Call with no arguments */
966         d = Call(name_expr, NULL, NULL, NULL, NULL, LINENO(n),
967                  n->n_col_offset, c->c_arena);
968         if (!d)
969             return NULL;
970         name_expr = NULL;
971     }
972     else {
973         d = ast_for_call(c, CHILD(n, 3), name_expr);
974         if (!d)
975             return NULL;
976         name_expr = NULL;
977     }
978 
979     return d;
980 }
981 
982 static asdl_seq*
ast_for_decorators(struct compiling * c,const node * n)983 ast_for_decorators(struct compiling *c, const node *n)
984 {
985     asdl_seq* decorator_seq;
986     expr_ty d;
987     int i;
988 
989     REQ(n, decorators);
990     decorator_seq = asdl_seq_new(NCH(n), c->c_arena);
991     if (!decorator_seq)
992         return NULL;
993 
994     for (i = 0; i < NCH(n); i++) {
995         d = ast_for_decorator(c, CHILD(n, i));
996         if (!d)
997             return NULL;
998         asdl_seq_SET(decorator_seq, i, d);
999     }
1000     return decorator_seq;
1001 }
1002 
1003 static stmt_ty
ast_for_funcdef(struct compiling * c,const node * n,asdl_seq * decorator_seq)1004 ast_for_funcdef(struct compiling *c, const node *n, asdl_seq *decorator_seq)
1005 {
1006     /* funcdef: 'def' NAME parameters ':' [TYPE_COMMENT] suite */
1007     identifier name;
1008     arguments_ty args;
1009     asdl_seq *body;
1010     int name_i = 1;
1011     node *tc;
1012     string type_comment = NULL;
1013 
1014     REQ(n, funcdef);
1015 
1016     name = NEW_IDENTIFIER(CHILD(n, name_i));
1017     if (!name)
1018         return NULL;
1019     else if (!forbidden_check(c, CHILD(n, name_i), STR(CHILD(n, name_i))))
1020         return NULL;
1021     args = ast_for_arguments(c, CHILD(n, name_i + 1));
1022     if (!args)
1023         return NULL;
1024     if (TYPE(CHILD(n, name_i + 3)) == TYPE_COMMENT) {
1025         type_comment = NEW_TYPE_COMMENT(CHILD(n, name_i + 3));
1026         name_i += 1;
1027     }
1028     body = ast_for_suite(c, CHILD(n, name_i + 3));
1029     if (!body)
1030         return NULL;
1031 
1032     if (!type_comment && NCH(CHILD(n, name_i + 3)) > 1) {
1033         /* If the function doesn't have a type comment on the same line, check
1034          * if the suite has a type comment in it. */
1035         tc = CHILD(CHILD(n, name_i + 3), 1);
1036 
1037         if (TYPE(tc) == TYPE_COMMENT)
1038             type_comment = NEW_TYPE_COMMENT(tc);
1039     }
1040 
1041     return FunctionDef(name, args, body, decorator_seq, type_comment, LINENO(n),
1042                        n->n_col_offset, c->c_arena);
1043 }
1044 
1045 static stmt_ty
ast_for_decorated(struct compiling * c,const node * n)1046 ast_for_decorated(struct compiling *c, const node *n)
1047 {
1048     /* decorated: decorators (classdef | funcdef) */
1049     stmt_ty thing = NULL;
1050     asdl_seq *decorator_seq = NULL;
1051 
1052     REQ(n, decorated);
1053 
1054     decorator_seq = ast_for_decorators(c, CHILD(n, 0));
1055     if (!decorator_seq)
1056       return NULL;
1057 
1058     assert(TYPE(CHILD(n, 1)) == funcdef ||
1059            TYPE(CHILD(n, 1)) == classdef);
1060 
1061     if (TYPE(CHILD(n, 1)) == funcdef) {
1062       thing = ast_for_funcdef(c, CHILD(n, 1), decorator_seq);
1063     } else if (TYPE(CHILD(n, 1)) == classdef) {
1064       thing = ast_for_classdef(c, CHILD(n, 1), decorator_seq);
1065     }
1066     /* we count the decorators in when talking about the class' or
1067        function's line number */
1068     if (thing) {
1069         thing->lineno = LINENO(n);
1070         thing->col_offset = n->n_col_offset;
1071     }
1072     return thing;
1073 }
1074 
1075 static expr_ty
ast_for_lambdef(struct compiling * c,const node * n)1076 ast_for_lambdef(struct compiling *c, const node *n)
1077 {
1078     /* lambdef: 'lambda' [varargslist] ':' test */
1079     arguments_ty args;
1080     expr_ty expression;
1081 
1082     if (NCH(n) == 3) {
1083         args = arguments(NULL, NULL, NULL, NULL, NULL, c->c_arena);
1084         if (!args)
1085             return NULL;
1086         expression = ast_for_expr(c, CHILD(n, 2));
1087         if (!expression)
1088             return NULL;
1089     }
1090     else {
1091         args = ast_for_arguments(c, CHILD(n, 1));
1092         if (!args)
1093             return NULL;
1094         expression = ast_for_expr(c, CHILD(n, 3));
1095         if (!expression)
1096             return NULL;
1097     }
1098 
1099     return Lambda(args, expression, LINENO(n), n->n_col_offset, c->c_arena);
1100 }
1101 
1102 static expr_ty
ast_for_ifexpr(struct compiling * c,const node * n)1103 ast_for_ifexpr(struct compiling *c, const node *n)
1104 {
1105     /* test: or_test 'if' or_test 'else' test */
1106     expr_ty expression, body, orelse;
1107 
1108     assert(NCH(n) == 5);
1109     body = ast_for_expr(c, CHILD(n, 0));
1110     if (!body)
1111         return NULL;
1112     expression = ast_for_expr(c, CHILD(n, 2));
1113     if (!expression)
1114         return NULL;
1115     orelse = ast_for_expr(c, CHILD(n, 4));
1116     if (!orelse)
1117         return NULL;
1118     return IfExp(expression, body, orelse, LINENO(n), n->n_col_offset,
1119                  c->c_arena);
1120 }
1121 
1122 /* XXX(nnorwitz): the listcomp and genexpr code should be refactored
1123    so there is only a single version.  Possibly for loops can also re-use
1124    the code.
1125 */
1126 
1127 /* Count the number of 'for' loop in a list comprehension.
1128 
1129    Helper for ast_for_listcomp().
1130 */
1131 
1132 static int
count_list_fors(struct compiling * c,const node * n)1133 count_list_fors(struct compiling *c, const node *n)
1134 {
1135     int n_fors = 0;
1136     node *ch = CHILD(n, 1);
1137 
1138  count_list_for:
1139     n_fors++;
1140     REQ(ch, list_for);
1141     if (NCH(ch) == 5)
1142         ch = CHILD(ch, 4);
1143     else
1144         return n_fors;
1145  count_list_iter:
1146     REQ(ch, list_iter);
1147     ch = CHILD(ch, 0);
1148     if (TYPE(ch) == list_for)
1149         goto count_list_for;
1150     else if (TYPE(ch) == list_if) {
1151         if (NCH(ch) == 3) {
1152             ch = CHILD(ch, 2);
1153             goto count_list_iter;
1154         }
1155         else
1156             return n_fors;
1157     }
1158 
1159     /* Should never be reached */
1160     PyErr_SetString(PyExc_SystemError, "logic error in count_list_fors");
1161     return -1;
1162 }
1163 
1164 /* Count the number of 'if' statements in a list comprehension.
1165 
1166    Helper for ast_for_listcomp().
1167 */
1168 
1169 static int
count_list_ifs(struct compiling * c,const node * n)1170 count_list_ifs(struct compiling *c, const node *n)
1171 {
1172     int n_ifs = 0;
1173 
1174  count_list_iter:
1175     REQ(n, list_iter);
1176     if (TYPE(CHILD(n, 0)) == list_for)
1177         return n_ifs;
1178     n = CHILD(n, 0);
1179     REQ(n, list_if);
1180     n_ifs++;
1181     if (NCH(n) == 2)
1182         return n_ifs;
1183     n = CHILD(n, 2);
1184     goto count_list_iter;
1185 }
1186 
1187 static expr_ty
ast_for_listcomp(struct compiling * c,const node * n)1188 ast_for_listcomp(struct compiling *c, const node *n)
1189 {
1190     /* listmaker: test ( list_for | (',' test)* [','] )
1191        list_for: 'for' exprlist 'in' testlist_safe [list_iter]
1192        list_iter: list_for | list_if
1193        list_if: 'if' test [list_iter]
1194        testlist_safe: test [(',' test)+ [',']]
1195     */
1196     expr_ty elt, first;
1197     asdl_seq *listcomps;
1198     int i, n_fors;
1199     node *ch;
1200 
1201     REQ(n, listmaker);
1202     assert(NCH(n) > 1);
1203 
1204     elt = ast_for_expr(c, CHILD(n, 0));
1205     if (!elt)
1206         return NULL;
1207 
1208     n_fors = count_list_fors(c, n);
1209     if (n_fors == -1)
1210         return NULL;
1211 
1212     listcomps = asdl_seq_new(n_fors, c->c_arena);
1213     if (!listcomps)
1214         return NULL;
1215 
1216     ch = CHILD(n, 1);
1217     for (i = 0; i < n_fors; i++) {
1218         comprehension_ty lc;
1219         asdl_seq *t;
1220         expr_ty expression;
1221         node *for_ch;
1222 
1223         REQ(ch, list_for);
1224 
1225         for_ch = CHILD(ch, 1);
1226         t = ast_for_exprlist(c, for_ch, Store);
1227         if (!t)
1228             return NULL;
1229         expression = ast_for_testlist(c, CHILD(ch, 3));
1230         if (!expression)
1231             return NULL;
1232 
1233         /* Check the # of children rather than the length of t, since
1234            [x for x, in ... ] has 1 element in t, but still requires a Tuple.
1235         */
1236         first = (expr_ty)asdl_seq_GET(t, 0);
1237         if (NCH(for_ch) == 1)
1238             lc = comprehension(first, expression, NULL, c->c_arena);
1239         else
1240             lc = comprehension(Tuple(t, Store, first->lineno, first->col_offset,
1241                                      c->c_arena),
1242                                expression, NULL, c->c_arena);
1243         if (!lc)
1244             return NULL;
1245 
1246         if (NCH(ch) == 5) {
1247             int j, n_ifs;
1248             asdl_seq *ifs;
1249             expr_ty list_for_expr;
1250 
1251             ch = CHILD(ch, 4);
1252             n_ifs = count_list_ifs(c, ch);
1253             if (n_ifs == -1)
1254                 return NULL;
1255 
1256             ifs = asdl_seq_new(n_ifs, c->c_arena);
1257             if (!ifs)
1258                 return NULL;
1259 
1260             for (j = 0; j < n_ifs; j++) {
1261                 REQ(ch, list_iter);
1262                 ch = CHILD(ch, 0);
1263                 REQ(ch, list_if);
1264 
1265                 list_for_expr = ast_for_expr(c, CHILD(ch, 1));
1266                 if (!list_for_expr)
1267                     return NULL;
1268 
1269                 asdl_seq_SET(ifs, j, list_for_expr);
1270                 if (NCH(ch) == 3)
1271                     ch = CHILD(ch, 2);
1272             }
1273             /* on exit, must guarantee that ch is a list_for */
1274             if (TYPE(ch) == list_iter)
1275                 ch = CHILD(ch, 0);
1276             lc->ifs = ifs;
1277         }
1278         asdl_seq_SET(listcomps, i, lc);
1279     }
1280 
1281     return ListComp(elt, listcomps, LINENO(n), n->n_col_offset, c->c_arena);
1282 }
1283 
1284 /*
1285    Count the number of 'for' loops in a comprehension.
1286 
1287    Helper for ast_for_comprehension().
1288 */
1289 
1290 static int
count_comp_fors(struct compiling * c,const node * n)1291 count_comp_fors(struct compiling *c, const node *n)
1292 {
1293     int n_fors = 0;
1294 
1295   count_comp_for:
1296     n_fors++;
1297     REQ(n, comp_for);
1298     if (NCH(n) == 5)
1299         n = CHILD(n, 4);
1300     else
1301         return n_fors;
1302   count_comp_iter:
1303     REQ(n, comp_iter);
1304     n = CHILD(n, 0);
1305     if (TYPE(n) == comp_for)
1306         goto count_comp_for;
1307     else if (TYPE(n) == comp_if) {
1308         if (NCH(n) == 3) {
1309             n = CHILD(n, 2);
1310             goto count_comp_iter;
1311         }
1312         else
1313             return n_fors;
1314     }
1315 
1316     /* Should never be reached */
1317     PyErr_SetString(PyExc_SystemError,
1318                     "logic error in count_comp_fors");
1319     return -1;
1320 }
1321 
1322 /* Count the number of 'if' statements in a comprehension.
1323 
1324    Helper for ast_for_comprehension().
1325 */
1326 
1327 static int
count_comp_ifs(struct compiling * c,const node * n)1328 count_comp_ifs(struct compiling *c, const node *n)
1329 {
1330     int n_ifs = 0;
1331 
1332     while (1) {
1333         REQ(n, comp_iter);
1334         if (TYPE(CHILD(n, 0)) == comp_for)
1335             return n_ifs;
1336         n = CHILD(n, 0);
1337         REQ(n, comp_if);
1338         n_ifs++;
1339         if (NCH(n) == 2)
1340             return n_ifs;
1341         n = CHILD(n, 2);
1342     }
1343 }
1344 
1345 static asdl_seq *
ast_for_comprehension(struct compiling * c,const node * n)1346 ast_for_comprehension(struct compiling *c, const node *n)
1347 {
1348     int i, n_fors;
1349     asdl_seq *comps;
1350 
1351     n_fors = count_comp_fors(c, n);
1352     if (n_fors == -1)
1353         return NULL;
1354 
1355     comps = asdl_seq_new(n_fors, c->c_arena);
1356     if (!comps)
1357         return NULL;
1358 
1359     for (i = 0; i < n_fors; i++) {
1360         comprehension_ty comp;
1361         asdl_seq *t;
1362         expr_ty expression, first;
1363         node *for_ch;
1364 
1365         REQ(n, comp_for);
1366 
1367         for_ch = CHILD(n, 1);
1368         t = ast_for_exprlist(c, for_ch, Store);
1369         if (!t)
1370             return NULL;
1371         expression = ast_for_expr(c, CHILD(n, 3));
1372         if (!expression)
1373             return NULL;
1374 
1375         /* Check the # of children rather than the length of t, since
1376            (x for x, in ...) has 1 element in t, but still requires a Tuple. */
1377         first = (expr_ty)asdl_seq_GET(t, 0);
1378         if (NCH(for_ch) == 1)
1379             comp = comprehension(first, expression, NULL, c->c_arena);
1380         else
1381             comp = comprehension(Tuple(t, Store, first->lineno, first->col_offset,
1382                                      c->c_arena),
1383                                expression, NULL, c->c_arena);
1384         if (!comp)
1385             return NULL;
1386 
1387         if (NCH(n) == 5) {
1388             int j, n_ifs;
1389             asdl_seq *ifs;
1390 
1391             n = CHILD(n, 4);
1392             n_ifs = count_comp_ifs(c, n);
1393             if (n_ifs == -1)
1394                 return NULL;
1395 
1396             ifs = asdl_seq_new(n_ifs, c->c_arena);
1397             if (!ifs)
1398                 return NULL;
1399 
1400             for (j = 0; j < n_ifs; j++) {
1401                 REQ(n, comp_iter);
1402                 n = CHILD(n, 0);
1403                 REQ(n, comp_if);
1404 
1405                 expression = ast_for_expr(c, CHILD(n, 1));
1406                 if (!expression)
1407                     return NULL;
1408                 asdl_seq_SET(ifs, j, expression);
1409                 if (NCH(n) == 3)
1410                     n = CHILD(n, 2);
1411             }
1412             /* on exit, must guarantee that n is a comp_for */
1413             if (TYPE(n) == comp_iter)
1414                 n = CHILD(n, 0);
1415             comp->ifs = ifs;
1416         }
1417         asdl_seq_SET(comps, i, comp);
1418     }
1419     return comps;
1420 }
1421 
1422 static expr_ty
ast_for_itercomp(struct compiling * c,const node * n,int type)1423 ast_for_itercomp(struct compiling *c, const node *n, int type)
1424 {
1425     expr_ty elt;
1426     asdl_seq *comps;
1427 
1428     assert(NCH(n) > 1);
1429 
1430     elt = ast_for_expr(c, CHILD(n, 0));
1431     if (!elt)
1432         return NULL;
1433 
1434     comps = ast_for_comprehension(c, CHILD(n, 1));
1435     if (!comps)
1436         return NULL;
1437 
1438     if (type == COMP_GENEXP)
1439         return GeneratorExp(elt, comps, LINENO(n), n->n_col_offset, c->c_arena);
1440     else if (type == COMP_SETCOMP)
1441         return SetComp(elt, comps, LINENO(n), n->n_col_offset, c->c_arena);
1442     else
1443         /* Should never happen */
1444         return NULL;
1445 }
1446 
1447 static expr_ty
ast_for_dictcomp(struct compiling * c,const node * n)1448 ast_for_dictcomp(struct compiling *c, const node *n)
1449 {
1450     expr_ty key, value;
1451     asdl_seq *comps;
1452 
1453     assert(NCH(n) > 3);
1454     REQ(CHILD(n, 1), COLON);
1455 
1456     key = ast_for_expr(c, CHILD(n, 0));
1457     if (!key)
1458         return NULL;
1459 
1460     value = ast_for_expr(c, CHILD(n, 2));
1461     if (!value)
1462         return NULL;
1463 
1464     comps = ast_for_comprehension(c, CHILD(n, 3));
1465     if (!comps)
1466         return NULL;
1467 
1468     return DictComp(key, value, comps, LINENO(n), n->n_col_offset, c->c_arena);
1469 }
1470 
1471 static expr_ty
ast_for_genexp(struct compiling * c,const node * n)1472 ast_for_genexp(struct compiling *c, const node *n)
1473 {
1474     assert(TYPE(n) == (testlist_comp) || TYPE(n) == (argument));
1475     return ast_for_itercomp(c, n, COMP_GENEXP);
1476 }
1477 
1478 static expr_ty
ast_for_setcomp(struct compiling * c,const node * n)1479 ast_for_setcomp(struct compiling *c, const node *n)
1480 {
1481     assert(TYPE(n) == (dictorsetmaker));
1482     return ast_for_itercomp(c, n, COMP_SETCOMP);
1483 }
1484 
1485 static expr_ty
ast_for_atom(struct compiling * c,const node * n)1486 ast_for_atom(struct compiling *c, const node *n)
1487 {
1488     /* atom: '(' [yield_expr|testlist_comp] ')' | '[' [listmaker] ']'
1489        | '{' [dictmaker] '}' | '`' testlist '`' | NAME | NUMBER | STRING+
1490     */
1491     node *ch = CHILD(n, 0);
1492 
1493     switch (TYPE(ch)) {
1494     case NAME: {
1495         /* All names start in Load context, but may later be
1496            changed. */
1497         PyObject *name = NEW_IDENTIFIER(ch);
1498         if (!name)
1499             return NULL;
1500         return Name(name, Load, LINENO(n), n->n_col_offset, c->c_arena);
1501     }
1502     case STRING: {
1503         PyObject *kind, *str = parsestrplus(c, n);
1504         const char *raw, *s = STR(CHILD(n, 0));
1505         /* currently Python allows up to 2 string modifiers */
1506         char *ch, s_kind[3] = {0, 0, 0};
1507         ch = s_kind;
1508         raw = s;
1509         while (*raw && *raw != '\'' && *raw != '"') {
1510             *ch++ = *raw++;
1511         }
1512         kind = PyUnicode_FromString(s_kind);
1513         if (!kind) {
1514             return NULL;
1515         }
1516         if (!str) {
1517 #ifdef Py_USING_UNICODE
1518             if (PyErr_ExceptionMatches(PyExc_UnicodeError)){
1519                 PyObject *type, *value, *tback, *errstr;
1520                 PyErr_Fetch(&type, &value, &tback);
1521                 errstr = PyObject_Str(value);
1522                 if (errstr) {
1523                     const char *s = "";
1524                     char buf[128];
1525                     s = _PyUnicode_AsString(errstr);
1526                     PyOS_snprintf(buf, sizeof(buf), "(unicode error) %s", s);
1527                     ast_error(n, buf);
1528                     Py_DECREF(errstr);
1529                 } else {
1530                     ast_error(n, "(unicode error) unknown error");
1531                 }
1532                 Py_DECREF(type);
1533                 Py_DECREF(value);
1534                 Py_XDECREF(tback);
1535             }
1536 #endif
1537             return NULL;
1538         }
1539         PyArena_AddPyObject(c->c_arena, str);
1540         return Str(str, kind, LINENO(n), n->n_col_offset, c->c_arena);
1541     }
1542     case NUMBER: {
1543         PyObject *pynum = parsenumber(c, STR(ch));
1544         if (!pynum)
1545             return NULL;
1546 
1547         PyArena_AddPyObject(c->c_arena, pynum);
1548         return Num(pynum, LINENO(n), n->n_col_offset, c->c_arena);
1549     }
1550     case LPAR: /* some parenthesized expressions */
1551         ch = CHILD(n, 1);
1552 
1553         if (TYPE(ch) == RPAR)
1554             return Tuple(NULL, Load, LINENO(n), n->n_col_offset, c->c_arena);
1555 
1556         if (TYPE(ch) == yield_expr)
1557             return ast_for_expr(c, ch);
1558 
1559         return ast_for_testlist_comp(c, ch);
1560     case LSQB: /* list (or list comprehension) */
1561         ch = CHILD(n, 1);
1562 
1563         if (TYPE(ch) == RSQB)
1564             return List(NULL, Load, LINENO(n), n->n_col_offset, c->c_arena);
1565 
1566         REQ(ch, listmaker);
1567         if (NCH(ch) == 1 || TYPE(CHILD(ch, 1)) == COMMA) {
1568             asdl_seq *elts = seq_for_testlist(c, ch);
1569             if (!elts)
1570                 return NULL;
1571 
1572             return List(elts, Load, LINENO(n), n->n_col_offset, c->c_arena);
1573         }
1574         else
1575             return ast_for_listcomp(c, ch);
1576     case LBRACE: {
1577         /* dictorsetmaker:
1578          *    (test ':' test (comp_for | (',' test ':' test)* [','])) |
1579          *    (test (comp_for | (',' test)* [',']))
1580          */
1581         int i, size;
1582         asdl_seq *keys, *values;
1583 
1584         ch = CHILD(n, 1);
1585         if (TYPE(ch) == RBRACE) {
1586             /* it's an empty dict */
1587             return Dict(NULL, NULL, LINENO(n), n->n_col_offset, c->c_arena);
1588         } else if (NCH(ch) == 1 || TYPE(CHILD(ch, 1)) == COMMA) {
1589             /* it's a simple set */
1590             asdl_seq *elts;
1591             size = (NCH(ch) + 1) / 2; /* +1 in case no trailing comma */
1592             elts = asdl_seq_new(size, c->c_arena);
1593             if (!elts)
1594                 return NULL;
1595             for (i = 0; i < NCH(ch); i += 2) {
1596                 expr_ty expression;
1597                 expression = ast_for_expr(c, CHILD(ch, i));
1598                 if (!expression)
1599                     return NULL;
1600                 asdl_seq_SET(elts, i / 2, expression);
1601             }
1602             return Set(elts, LINENO(n), n->n_col_offset, c->c_arena);
1603         } else if (TYPE(CHILD(ch, 1)) == comp_for) {
1604             /* it's a set comprehension */
1605             return ast_for_setcomp(c, ch);
1606         } else if (NCH(ch) > 3 && TYPE(CHILD(ch, 3)) == comp_for) {
1607             return ast_for_dictcomp(c, ch);
1608         } else {
1609             /* it's a dict */
1610             size = (NCH(ch) + 1) / 4; /* +1 in case no trailing comma */
1611             keys = asdl_seq_new(size, c->c_arena);
1612             if (!keys)
1613                 return NULL;
1614 
1615             values = asdl_seq_new(size, c->c_arena);
1616             if (!values)
1617                 return NULL;
1618 
1619             for (i = 0; i < NCH(ch); i += 4) {
1620                 expr_ty expression;
1621 
1622                 expression = ast_for_expr(c, CHILD(ch, i));
1623                 if (!expression)
1624                     return NULL;
1625 
1626                 asdl_seq_SET(keys, i / 4, expression);
1627 
1628                 expression = ast_for_expr(c, CHILD(ch, i + 2));
1629                 if (!expression)
1630                     return NULL;
1631 
1632                 asdl_seq_SET(values, i / 4, expression);
1633             }
1634             return Dict(keys, values, LINENO(n), n->n_col_offset, c->c_arena);
1635         }
1636     }
1637     case BACKQUOTE: { /* repr */
1638         expr_ty expression;
1639         if (Py_Py3kWarningFlag &&
1640             !ast_warn(c, n, "backquote not supported in 3.x; use repr()"))
1641             return NULL;
1642         expression = ast_for_testlist(c, CHILD(n, 1));
1643         if (!expression)
1644             return NULL;
1645 
1646         return Repr(expression, LINENO(n), n->n_col_offset, c->c_arena);
1647     }
1648     default:
1649         PyErr_Format(PyExc_SystemError, "unhandled atom %d", TYPE(ch));
1650         return NULL;
1651     }
1652 }
1653 
1654 static slice_ty
ast_for_slice(struct compiling * c,const node * n)1655 ast_for_slice(struct compiling *c, const node *n)
1656 {
1657     node *ch;
1658     expr_ty lower = NULL, upper = NULL, step = NULL;
1659 
1660     REQ(n, subscript);
1661 
1662     /*
1663        subscript: '.' '.' '.' | test | [test] ':' [test] [sliceop]
1664        sliceop: ':' [test]
1665     */
1666     ch = CHILD(n, 0);
1667     if (TYPE(ch) == DOT)
1668         return Ellipsis(c->c_arena);
1669 
1670     if (NCH(n) == 1 && TYPE(ch) == test) {
1671         /* 'step' variable hold no significance in terms of being used over
1672            other vars */
1673         step = ast_for_expr(c, ch);
1674         if (!step)
1675             return NULL;
1676 
1677         return Index(step, c->c_arena);
1678     }
1679 
1680     if (TYPE(ch) == test) {
1681         lower = ast_for_expr(c, ch);
1682         if (!lower)
1683             return NULL;
1684     }
1685 
1686     /* If there's an upper bound it's in the second or third position. */
1687     if (TYPE(ch) == COLON) {
1688         if (NCH(n) > 1) {
1689             node *n2 = CHILD(n, 1);
1690 
1691             if (TYPE(n2) == test) {
1692                 upper = ast_for_expr(c, n2);
1693                 if (!upper)
1694                     return NULL;
1695             }
1696         }
1697     } else if (NCH(n) > 2) {
1698         node *n2 = CHILD(n, 2);
1699 
1700         if (TYPE(n2) == test) {
1701             upper = ast_for_expr(c, n2);
1702             if (!upper)
1703                 return NULL;
1704         }
1705     }
1706 
1707     ch = CHILD(n, NCH(n) - 1);
1708     if (TYPE(ch) == sliceop) {
1709         if (NCH(ch) == 1) {
1710             /*
1711               This is an extended slice (ie "x[::]") with no expression in the
1712               step field. We set this literally to "None" in order to
1713               disambiguate it from x[:]. (The interpreter might have to call
1714               __getslice__ for x[:], but it must call __getitem__ for x[::].)
1715             */
1716             identifier none = new_identifier("None", c->c_arena);
1717             if (!none)
1718                 return NULL;
1719             ch = CHILD(ch, 0);
1720             step = Name(none, Load, LINENO(ch), ch->n_col_offset, c->c_arena);
1721             if (!step)
1722                 return NULL;
1723         } else {
1724             ch = CHILD(ch, 1);
1725             if (TYPE(ch) == test) {
1726                 step = ast_for_expr(c, ch);
1727                 if (!step)
1728                     return NULL;
1729             }
1730         }
1731     }
1732 
1733     return Slice(lower, upper, step, c->c_arena);
1734 }
1735 
1736 static expr_ty
ast_for_binop(struct compiling * c,const node * n)1737 ast_for_binop(struct compiling *c, const node *n)
1738 {
1739         /* Must account for a sequence of expressions.
1740            How should A op B op C by represented?
1741            BinOp(BinOp(A, op, B), op, C).
1742         */
1743 
1744         int i, nops;
1745         expr_ty expr1, expr2, result;
1746         operator_ty newoperator;
1747 
1748         expr1 = ast_for_expr(c, CHILD(n, 0));
1749         if (!expr1)
1750             return NULL;
1751 
1752         expr2 = ast_for_expr(c, CHILD(n, 2));
1753         if (!expr2)
1754             return NULL;
1755 
1756         newoperator = get_operator(CHILD(n, 1));
1757         if (!newoperator)
1758             return NULL;
1759 
1760         result = BinOp(expr1, newoperator, expr2, LINENO(n), n->n_col_offset,
1761                        c->c_arena);
1762         if (!result)
1763             return NULL;
1764 
1765         nops = (NCH(n) - 1) / 2;
1766         for (i = 1; i < nops; i++) {
1767                 expr_ty tmp_result, tmp;
1768                 const node* next_oper = CHILD(n, i * 2 + 1);
1769 
1770                 newoperator = get_operator(next_oper);
1771                 if (!newoperator)
1772                     return NULL;
1773 
1774                 tmp = ast_for_expr(c, CHILD(n, i * 2 + 2));
1775                 if (!tmp)
1776                     return NULL;
1777 
1778                 tmp_result = BinOp(result, newoperator, tmp,
1779                                    LINENO(next_oper), next_oper->n_col_offset,
1780                                    c->c_arena);
1781                 if (!tmp_result)
1782                         return NULL;
1783                 result = tmp_result;
1784         }
1785         return result;
1786 }
1787 
1788 static expr_ty
ast_for_trailer(struct compiling * c,const node * n,expr_ty left_expr)1789 ast_for_trailer(struct compiling *c, const node *n, expr_ty left_expr)
1790 {
1791     /* trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
1792        subscriptlist: subscript (',' subscript)* [',']
1793        subscript: '.' '.' '.' | test | [test] ':' [test] [sliceop]
1794      */
1795     REQ(n, trailer);
1796     if (TYPE(CHILD(n, 0)) == LPAR) {
1797         if (NCH(n) == 2)
1798             return Call(left_expr, NULL, NULL, NULL, NULL, LINENO(n),
1799                         n->n_col_offset, c->c_arena);
1800         else
1801             return ast_for_call(c, CHILD(n, 1), left_expr);
1802     }
1803     else if (TYPE(CHILD(n, 0)) == DOT ) {
1804         PyObject *attr_id = NEW_IDENTIFIER(CHILD(n, 1));
1805         if (!attr_id)
1806             return NULL;
1807         return Attribute(left_expr, attr_id, Load,
1808                          LINENO(n), n->n_col_offset, c->c_arena);
1809     }
1810     else {
1811         REQ(CHILD(n, 0), LSQB);
1812         REQ(CHILD(n, 2), RSQB);
1813         n = CHILD(n, 1);
1814         if (NCH(n) == 1) {
1815             slice_ty slc = ast_for_slice(c, CHILD(n, 0));
1816             if (!slc)
1817                 return NULL;
1818             return Subscript(left_expr, slc, Load, LINENO(n), n->n_col_offset,
1819                              c->c_arena);
1820         }
1821         else {
1822             /* The grammar is ambiguous here. The ambiguity is resolved
1823                by treating the sequence as a tuple literal if there are
1824                no slice features.
1825             */
1826             int j;
1827             slice_ty slc;
1828             expr_ty e;
1829             bool simple = true;
1830             asdl_seq *slices, *elts;
1831             slices = asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
1832             if (!slices)
1833                 return NULL;
1834             for (j = 0; j < NCH(n); j += 2) {
1835                 slc = ast_for_slice(c, CHILD(n, j));
1836                 if (!slc)
1837                     return NULL;
1838                 if (slc->kind != Index_kind)
1839                     simple = false;
1840                 asdl_seq_SET(slices, j / 2, slc);
1841             }
1842             if (!simple) {
1843                 return Subscript(left_expr, ExtSlice(slices, c->c_arena),
1844                                  Load, LINENO(n), n->n_col_offset, c->c_arena);
1845             }
1846             /* extract Index values and put them in a Tuple */
1847             elts = asdl_seq_new(asdl_seq_LEN(slices), c->c_arena);
1848             if (!elts)
1849                 return NULL;
1850             for (j = 0; j < asdl_seq_LEN(slices); ++j) {
1851                 slc = (slice_ty)asdl_seq_GET(slices, j);
1852                 assert(slc->kind == Index_kind  && slc->v.Index.value);
1853                 asdl_seq_SET(elts, j, slc->v.Index.value);
1854             }
1855             e = Tuple(elts, Load, LINENO(n), n->n_col_offset, c->c_arena);
1856             if (!e)
1857                 return NULL;
1858             return Subscript(left_expr, Index(e, c->c_arena),
1859                              Load, LINENO(n), n->n_col_offset, c->c_arena);
1860         }
1861     }
1862 }
1863 
1864 static expr_ty
ast_for_factor(struct compiling * c,const node * n)1865 ast_for_factor(struct compiling *c, const node *n)
1866 {
1867     node *pfactor, *ppower, *patom, *pnum;
1868     expr_ty expression;
1869 
1870     /* If the unary - operator is applied to a constant, don't generate
1871        a UNARY_NEGATIVE opcode.  Just store the approriate value as a
1872        constant.  The peephole optimizer already does something like
1873        this but it doesn't handle the case where the constant is
1874        (sys.maxint - 1).  In that case, we want a PyIntObject, not a
1875        PyLongObject.
1876     */
1877     if (TYPE(CHILD(n, 0)) == MINUS &&
1878         NCH(n) == 2 &&
1879         TYPE((pfactor = CHILD(n, 1))) == factor &&
1880         NCH(pfactor) == 1 &&
1881         TYPE((ppower = CHILD(pfactor, 0))) == power &&
1882         NCH(ppower) == 1 &&
1883         TYPE((patom = CHILD(ppower, 0))) == atom &&
1884         TYPE((pnum = CHILD(patom, 0))) == NUMBER) {
1885         PyObject *pynum;
1886         char *s = PyObject_MALLOC(strlen(STR(pnum)) + 2);
1887         if (s == NULL)
1888             return NULL;
1889         s[0] = '-';
1890         strcpy(s + 1, STR(pnum));
1891         pynum = parsenumber(c, s);
1892         PyObject_FREE(s);
1893         if (!pynum)
1894             return NULL;
1895 
1896         PyArena_AddPyObject(c->c_arena, pynum);
1897         return Num(pynum, LINENO(n), n->n_col_offset, c->c_arena);
1898     }
1899 
1900     expression = ast_for_expr(c, CHILD(n, 1));
1901     if (!expression)
1902         return NULL;
1903 
1904     switch (TYPE(CHILD(n, 0))) {
1905         case PLUS:
1906             return UnaryOp(UAdd, expression, LINENO(n), n->n_col_offset,
1907                            c->c_arena);
1908         case MINUS:
1909             return UnaryOp(USub, expression, LINENO(n), n->n_col_offset,
1910                            c->c_arena);
1911         case TILDE:
1912             return UnaryOp(Invert, expression, LINENO(n),
1913                            n->n_col_offset, c->c_arena);
1914     }
1915     PyErr_Format(PyExc_SystemError, "unhandled factor: %d",
1916                  TYPE(CHILD(n, 0)));
1917     return NULL;
1918 }
1919 
1920 static expr_ty
ast_for_power(struct compiling * c,const node * n)1921 ast_for_power(struct compiling *c, const node *n)
1922 {
1923     /* power: atom trailer* ('**' factor)*
1924      */
1925     int i;
1926     expr_ty e, tmp;
1927     REQ(n, power);
1928     e = ast_for_atom(c, CHILD(n, 0));
1929     if (!e)
1930         return NULL;
1931     if (NCH(n) == 1)
1932         return e;
1933     for (i = 1; i < NCH(n); i++) {
1934         node *ch = CHILD(n, i);
1935         if (TYPE(ch) != trailer)
1936             break;
1937         tmp = ast_for_trailer(c, ch, e);
1938         if (!tmp)
1939             return NULL;
1940         tmp->lineno = e->lineno;
1941         tmp->col_offset = e->col_offset;
1942         e = tmp;
1943     }
1944     if (TYPE(CHILD(n, NCH(n) - 1)) == factor) {
1945         expr_ty f = ast_for_expr(c, CHILD(n, NCH(n) - 1));
1946         if (!f)
1947             return NULL;
1948         tmp = BinOp(e, Pow, f, LINENO(n), n->n_col_offset, c->c_arena);
1949         if (!tmp)
1950             return NULL;
1951         e = tmp;
1952     }
1953     return e;
1954 }
1955 
1956 /* Do not name a variable 'expr'!  Will cause a compile error.
1957 */
1958 
1959 static expr_ty
ast_for_expr(struct compiling * c,const node * n)1960 ast_for_expr(struct compiling *c, const node *n)
1961 {
1962     /* handle the full range of simple expressions
1963        test: or_test ['if' or_test 'else' test] | lambdef
1964        or_test: and_test ('or' and_test)*
1965        and_test: not_test ('and' not_test)*
1966        not_test: 'not' not_test | comparison
1967        comparison: expr (comp_op expr)*
1968        expr: xor_expr ('|' xor_expr)*
1969        xor_expr: and_expr ('^' and_expr)*
1970        and_expr: shift_expr ('&' shift_expr)*
1971        shift_expr: arith_expr (('<<'|'>>') arith_expr)*
1972        arith_expr: term (('+'|'-') term)*
1973        term: factor (('*'|'/'|'%'|'//') factor)*
1974        factor: ('+'|'-'|'~') factor | power
1975        power: atom trailer* ('**' factor)*
1976 
1977        As well as modified versions that exist for backward compatibility,
1978        to explicitly allow:
1979        [ x for x in lambda: 0, lambda: 1 ]
1980        (which would be ambiguous without these extra rules)
1981 
1982        old_test: or_test | old_lambdef
1983        old_lambdef: 'lambda' [vararglist] ':' old_test
1984 
1985     */
1986 
1987     asdl_seq *seq;
1988     int i;
1989 
1990  loop:
1991     switch (TYPE(n)) {
1992         case test:
1993         case old_test:
1994             if (TYPE(CHILD(n, 0)) == lambdef ||
1995                 TYPE(CHILD(n, 0)) == old_lambdef)
1996                 return ast_for_lambdef(c, CHILD(n, 0));
1997             else if (NCH(n) > 1)
1998                 return ast_for_ifexpr(c, n);
1999             /* Fallthrough */
2000         case or_test:
2001         case and_test:
2002             if (NCH(n) == 1) {
2003                 n = CHILD(n, 0);
2004                 goto loop;
2005             }
2006             seq = asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
2007             if (!seq)
2008                 return NULL;
2009             for (i = 0; i < NCH(n); i += 2) {
2010                 expr_ty e = ast_for_expr(c, CHILD(n, i));
2011                 if (!e)
2012                     return NULL;
2013                 asdl_seq_SET(seq, i / 2, e);
2014             }
2015             if (!strcmp(STR(CHILD(n, 1)), "and"))
2016                 return BoolOp(And, seq, LINENO(n), n->n_col_offset,
2017                               c->c_arena);
2018             assert(!strcmp(STR(CHILD(n, 1)), "or"));
2019             return BoolOp(Or, seq, LINENO(n), n->n_col_offset, c->c_arena);
2020         case not_test:
2021             if (NCH(n) == 1) {
2022                 n = CHILD(n, 0);
2023                 goto loop;
2024             }
2025             else {
2026                 expr_ty expression = ast_for_expr(c, CHILD(n, 1));
2027                 if (!expression)
2028                     return NULL;
2029 
2030                 return UnaryOp(Not, expression, LINENO(n), n->n_col_offset,
2031                                c->c_arena);
2032             }
2033         case comparison:
2034             if (NCH(n) == 1) {
2035                 n = CHILD(n, 0);
2036                 goto loop;
2037             }
2038             else {
2039                 expr_ty expression;
2040                 asdl_int_seq *ops;
2041                 asdl_seq *cmps;
2042                 ops = asdl_int_seq_new(NCH(n) / 2, c->c_arena);
2043                 if (!ops)
2044                     return NULL;
2045                 cmps = asdl_seq_new(NCH(n) / 2, c->c_arena);
2046                 if (!cmps) {
2047                     return NULL;
2048                 }
2049                 for (i = 1; i < NCH(n); i += 2) {
2050                     cmpop_ty newoperator;
2051 
2052                     newoperator = ast_for_comp_op(c, CHILD(n, i));
2053                     if (!newoperator) {
2054                         return NULL;
2055                     }
2056 
2057                     expression = ast_for_expr(c, CHILD(n, i + 1));
2058                     if (!expression) {
2059                         return NULL;
2060                     }
2061 
2062                     asdl_seq_SET(ops, i / 2, newoperator);
2063                     asdl_seq_SET(cmps, i / 2, expression);
2064                 }
2065                 expression = ast_for_expr(c, CHILD(n, 0));
2066                 if (!expression) {
2067                     return NULL;
2068                 }
2069 
2070                 return Compare(expression, ops, cmps, LINENO(n),
2071                                n->n_col_offset, c->c_arena);
2072             }
2073             break;
2074 
2075         /* The next five cases all handle BinOps.  The main body of code
2076            is the same in each case, but the switch turned inside out to
2077            reuse the code for each type of operator.
2078          */
2079         case expr:
2080         case xor_expr:
2081         case and_expr:
2082         case shift_expr:
2083         case arith_expr:
2084         case term:
2085             if (NCH(n) == 1) {
2086                 n = CHILD(n, 0);
2087                 goto loop;
2088             }
2089             return ast_for_binop(c, n);
2090         case yield_expr: {
2091             expr_ty exp = NULL;
2092             if (NCH(n) == 2) {
2093                 exp = ast_for_testlist(c, CHILD(n, 1));
2094                 if (!exp)
2095                     return NULL;
2096             }
2097             return Yield(exp, LINENO(n), n->n_col_offset, c->c_arena);
2098         }
2099         case factor:
2100             if (NCH(n) == 1) {
2101                 n = CHILD(n, 0);
2102                 goto loop;
2103             }
2104             return ast_for_factor(c, n);
2105         case power:
2106             return ast_for_power(c, n);
2107         default:
2108             PyErr_Format(PyExc_SystemError, "unhandled expr: %d", TYPE(n));
2109             return NULL;
2110     }
2111     /* should never get here unless if error is set */
2112     return NULL;
2113 }
2114 
2115 static expr_ty
ast_for_call(struct compiling * c,const node * n,expr_ty func)2116 ast_for_call(struct compiling *c, const node *n, expr_ty func)
2117 {
2118     /*
2119       arglist: (argument ',')* (argument [',']| '*' test [',' '**' test]
2120                | '**' test)
2121       argument: [test '='] test [comp_for]        # Really [keyword '='] test
2122     */
2123 
2124     int i, nargs, nkeywords, ngens;
2125     asdl_seq *args;
2126     asdl_seq *keywords;
2127     expr_ty vararg = NULL, kwarg = NULL;
2128 
2129     REQ(n, arglist);
2130 
2131     nargs = 0;
2132     nkeywords = 0;
2133     ngens = 0;
2134     for (i = 0; i < NCH(n); i++) {
2135         node *ch = CHILD(n, i);
2136         if (TYPE(ch) == argument) {
2137             if (NCH(ch) == 1)
2138                 nargs++;
2139             else if (TYPE(CHILD(ch, 1)) == comp_for)
2140                 ngens++;
2141             else
2142                 nkeywords++;
2143         }
2144     }
2145     if (ngens > 1 || (ngens && (nargs || nkeywords))) {
2146         ast_error(n, "Generator expression must be parenthesized "
2147                   "if not sole argument");
2148         return NULL;
2149     }
2150 
2151     if (nargs + nkeywords + ngens > 255) {
2152       ast_error(n, "more than 255 arguments");
2153       return NULL;
2154     }
2155 
2156     args = asdl_seq_new(nargs + ngens, c->c_arena);
2157     if (!args)
2158         return NULL;
2159     keywords = asdl_seq_new(nkeywords, c->c_arena);
2160     if (!keywords)
2161         return NULL;
2162     nargs = 0;
2163     nkeywords = 0;
2164     for (i = 0; i < NCH(n); i++) {
2165         node *ch = CHILD(n, i);
2166         if (TYPE(ch) == argument) {
2167             expr_ty e;
2168             if (NCH(ch) == 1) {
2169                 if (nkeywords) {
2170                     ast_error(CHILD(ch, 0),
2171                               "non-keyword arg after keyword arg");
2172                     return NULL;
2173                 }
2174                 if (vararg) {
2175                     ast_error(CHILD(ch, 0),
2176                               "only named arguments may follow *expression");
2177                     return NULL;
2178                 }
2179                 e = ast_for_expr(c, CHILD(ch, 0));
2180                 if (!e)
2181                     return NULL;
2182                 asdl_seq_SET(args, nargs++, e);
2183             }
2184             else if (TYPE(CHILD(ch, 1)) == comp_for) {
2185                 e = ast_for_genexp(c, ch);
2186                 if (!e)
2187                     return NULL;
2188                 asdl_seq_SET(args, nargs++, e);
2189             }
2190             else {
2191                 keyword_ty kw;
2192                 identifier key;
2193                 int k;
2194                 const char *tmp;
2195 
2196                 /* CHILD(ch, 0) is test, but must be an identifier? */
2197                 e = ast_for_expr(c, CHILD(ch, 0));
2198                 if (!e)
2199                     return NULL;
2200                 /* f(lambda x: x[0] = 3) ends up getting parsed with
2201                  * LHS test = lambda x: x[0], and RHS test = 3.
2202                  * SF bug 132313 points out that complaining about a keyword
2203                  * then is very confusing.
2204                  */
2205                 if (e->kind == Lambda_kind) {
2206                     ast_error(CHILD(ch, 0),
2207                               "lambda cannot contain assignment");
2208                     return NULL;
2209                 } else if (e->kind != Name_kind) {
2210                     ast_error(CHILD(ch, 0), "keyword can't be an expression");
2211                     return NULL;
2212                 }
2213                 key = e->v.Name.id;
2214                 if (!forbidden_check(c, CHILD(ch, 0), PyUnicode_AsUTF8(key)))
2215                     return NULL;
2216                 for (k = 0; k < nkeywords; k++) {
2217                     tmp = _PyUnicode_AsString(
2218                         ((keyword_ty)asdl_seq_GET(keywords, k))->arg);
2219                     if (!strcmp(tmp, _PyUnicode_AsString(key))) {
2220                         ast_error(CHILD(ch, 0), "keyword argument repeated");
2221                         return NULL;
2222                     }
2223                 }
2224                 e = ast_for_expr(c, CHILD(ch, 2));
2225                 if (!e)
2226                     return NULL;
2227                 kw = keyword(key, e, c->c_arena);
2228                 if (!kw)
2229                     return NULL;
2230                 asdl_seq_SET(keywords, nkeywords++, kw);
2231             }
2232         }
2233         else if (TYPE(ch) == STAR) {
2234             vararg = ast_for_expr(c, CHILD(n, i+1));
2235             if (!vararg)
2236                 return NULL;
2237             i++;
2238         }
2239         else if (TYPE(ch) == DOUBLESTAR) {
2240             kwarg = ast_for_expr(c, CHILD(n, i+1));
2241             if (!kwarg)
2242                 return NULL;
2243             i++;
2244         }
2245     }
2246 
2247     return Call(func, args, keywords, vararg, kwarg, func->lineno,
2248                 func->col_offset, c->c_arena);
2249 }
2250 
2251 static expr_ty
ast_for_testlist(struct compiling * c,const node * n)2252 ast_for_testlist(struct compiling *c, const node* n)
2253 {
2254     /* testlist_comp: test (',' test)* [','] */
2255     /* testlist: test (',' test)* [','] */
2256     /* testlist_safe: test (',' test)+ [','] */
2257     /* testlist1: test (',' test)* */
2258     assert(NCH(n) > 0);
2259     if (TYPE(n) == testlist_comp) {
2260         if (NCH(n) > 1)
2261             assert(TYPE(CHILD(n, 1)) != comp_for);
2262     }
2263     else {
2264         assert(TYPE(n) == testlist ||
2265                TYPE(n) == testlist_safe ||
2266                TYPE(n) == testlist1);
2267     }
2268     if (NCH(n) == 1)
2269         return ast_for_expr(c, CHILD(n, 0));
2270     else {
2271         asdl_seq *tmp = seq_for_testlist(c, n);
2272         if (!tmp)
2273             return NULL;
2274         return Tuple(tmp, Load, LINENO(n), n->n_col_offset, c->c_arena);
2275     }
2276 }
2277 
2278 static expr_ty
ast_for_testlist_comp(struct compiling * c,const node * n)2279 ast_for_testlist_comp(struct compiling *c, const node* n)
2280 {
2281     /* testlist_comp: test ( comp_for | (',' test)* [','] ) */
2282     /* argument: test [ comp_for ] */
2283     assert(TYPE(n) == testlist_comp || TYPE(n) == argument);
2284     if (NCH(n) > 1 && TYPE(CHILD(n, 1)) == comp_for)
2285         return ast_for_genexp(c, n);
2286     return ast_for_testlist(c, n);
2287 }
2288 
2289 /* like ast_for_testlist() but returns a sequence */
2290 static asdl_seq*
ast_for_class_bases(struct compiling * c,const node * n)2291 ast_for_class_bases(struct compiling *c, const node* n)
2292 {
2293     /* testlist: test (',' test)* [','] */
2294     assert(NCH(n) > 0);
2295     REQ(n, testlist);
2296     if (NCH(n) == 1) {
2297         expr_ty base;
2298         asdl_seq *bases = asdl_seq_new(1, c->c_arena);
2299         if (!bases)
2300             return NULL;
2301         base = ast_for_expr(c, CHILD(n, 0));
2302         if (!base)
2303             return NULL;
2304         asdl_seq_SET(bases, 0, base);
2305         return bases;
2306     }
2307 
2308     return seq_for_testlist(c, n);
2309 }
2310 
2311 static stmt_ty
ast_for_expr_stmt(struct compiling * c,const node * n)2312 ast_for_expr_stmt(struct compiling *c, const node *n)
2313 {
2314     int num;
2315     REQ(n, expr_stmt);
2316     /* expr_stmt: testlist (augassign (yield_expr|testlist)
2317                 | ('=' (yield_expr|testlist))* [TYPE_COMMENT])
2318        testlist: test (',' test)* [',']
2319        augassign: '+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^='
2320                 | '<<=' | '>>=' | '**=' | '//='
2321        test: ... here starts the operator precendence dance
2322      */
2323     num = NCH(n);
2324 
2325     if (num == 1 || (num == 2 && TYPE(CHILD(n, 1)) == TYPE_COMMENT)) {
2326         expr_ty e = ast_for_testlist(c, CHILD(n, 0));
2327         if (!e)
2328             return NULL;
2329 
2330         return Expr(e, LINENO(n), n->n_col_offset, c->c_arena);
2331     }
2332     else if (TYPE(CHILD(n, 1)) == augassign) {
2333         expr_ty expr1, expr2;
2334         operator_ty newoperator;
2335         node *ch = CHILD(n, 0);
2336 
2337         expr1 = ast_for_testlist(c, ch);
2338         if (!expr1)
2339             return NULL;
2340         if(!set_context(c, expr1, Store, ch))
2341             return NULL;
2342         /* set_context checks that most expressions are not the left side.
2343           Augmented assignments can only have a name, a subscript, or an
2344           attribute on the left, though, so we have to explicitly check for
2345           those. */
2346         switch (expr1->kind) {
2347             case Name_kind:
2348             case Attribute_kind:
2349             case Subscript_kind:
2350                 break;
2351             default:
2352                 ast_error(ch, "illegal expression for augmented assignment");
2353                 return NULL;
2354         }
2355 
2356         ch = CHILD(n, 2);
2357         if (TYPE(ch) == testlist)
2358             expr2 = ast_for_testlist(c, ch);
2359         else
2360             expr2 = ast_for_expr(c, ch);
2361         if (!expr2)
2362             return NULL;
2363 
2364         newoperator = ast_for_augassign(c, CHILD(n, 1));
2365         if (!newoperator)
2366             return NULL;
2367 
2368         return AugAssign(expr1, newoperator, expr2, LINENO(n), n->n_col_offset,
2369                          c->c_arena);
2370     }
2371     else {
2372         int i, nch_minus_type, has_type_comment;
2373         asdl_seq *targets;
2374         node *value;
2375         expr_ty expression;
2376         string type_comment;
2377 
2378         /* a normal assignment */
2379         REQ(CHILD(n, 1), EQUAL);
2380 
2381         has_type_comment = TYPE(CHILD(n, num - 1)) == TYPE_COMMENT;
2382         nch_minus_type = num - has_type_comment;
2383 
2384         targets = asdl_seq_new(nch_minus_type / 2, c->c_arena);
2385         if (!targets)
2386             return NULL;
2387         for (i = 0; i < nch_minus_type - 2; i += 2) {
2388             expr_ty e;
2389             node *ch = CHILD(n, i);
2390             if (TYPE(ch) == yield_expr) {
2391                 ast_error(ch, "assignment to yield expression not possible");
2392                 return NULL;
2393             }
2394             e = ast_for_testlist(c, ch);
2395             if (!e)
2396                 return NULL;
2397 
2398             /* set context to assign */
2399             if (!set_context(c, e, Store, CHILD(n, i)))
2400                 return NULL;
2401 
2402             asdl_seq_SET(targets, i / 2, e);
2403         }
2404         value = CHILD(n, nch_minus_type - 1);
2405         if (TYPE(value) == testlist)
2406             expression = ast_for_testlist(c, value);
2407         else
2408             expression = ast_for_expr(c, value);
2409         if (!expression)
2410             return NULL;
2411         if (has_type_comment)
2412             type_comment = NEW_TYPE_COMMENT(CHILD(n, nch_minus_type));
2413         else
2414             type_comment = NULL;
2415         return Assign(targets, expression, type_comment, LINENO(n), n->n_col_offset,
2416                       c->c_arena);
2417     }
2418 }
2419 
2420 static stmt_ty
ast_for_print_stmt(struct compiling * c,const node * n)2421 ast_for_print_stmt(struct compiling *c, const node *n)
2422 {
2423     /* print_stmt: 'print' ( [ test (',' test)* [','] ]
2424                              | '>>' test [ (',' test)+ [','] ] )
2425      */
2426     expr_ty dest = NULL, expression;
2427     asdl_seq *seq = NULL;
2428     bool nl;
2429     int i, j, values_count, start = 1;
2430 
2431     REQ(n, print_stmt);
2432     if (NCH(n) >= 2 && TYPE(CHILD(n, 1)) == RIGHTSHIFT) {
2433         dest = ast_for_expr(c, CHILD(n, 2));
2434         if (!dest)
2435             return NULL;
2436         start = 4;
2437     }
2438     values_count = (NCH(n) + 1 - start) / 2;
2439     if (values_count) {
2440         seq = asdl_seq_new(values_count, c->c_arena);
2441         if (!seq)
2442             return NULL;
2443         for (i = start, j = 0; i < NCH(n); i += 2, ++j) {
2444             expression = ast_for_expr(c, CHILD(n, i));
2445             if (!expression)
2446                 return NULL;
2447             asdl_seq_SET(seq, j, expression);
2448         }
2449     }
2450     nl = (TYPE(CHILD(n, NCH(n) - 1)) == COMMA) ? false : true;
2451     return Print(dest, seq, nl, LINENO(n), n->n_col_offset, c->c_arena);
2452 }
2453 
2454 static asdl_seq *
ast_for_exprlist(struct compiling * c,const node * n,expr_context_ty context)2455 ast_for_exprlist(struct compiling *c, const node *n, expr_context_ty context)
2456 {
2457     asdl_seq *seq;
2458     int i;
2459     expr_ty e;
2460 
2461     REQ(n, exprlist);
2462 
2463     seq = asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
2464     if (!seq)
2465         return NULL;
2466     for (i = 0; i < NCH(n); i += 2) {
2467         e = ast_for_expr(c, CHILD(n, i));
2468         if (!e)
2469             return NULL;
2470         asdl_seq_SET(seq, i / 2, e);
2471         if (context && !set_context(c, e, context, CHILD(n, i)))
2472             return NULL;
2473     }
2474     return seq;
2475 }
2476 
2477 static stmt_ty
ast_for_del_stmt(struct compiling * c,const node * n)2478 ast_for_del_stmt(struct compiling *c, const node *n)
2479 {
2480     asdl_seq *expr_list;
2481 
2482     /* del_stmt: 'del' exprlist */
2483     REQ(n, del_stmt);
2484 
2485     expr_list = ast_for_exprlist(c, CHILD(n, 1), Del);
2486     if (!expr_list)
2487         return NULL;
2488     return Delete(expr_list, LINENO(n), n->n_col_offset, c->c_arena);
2489 }
2490 
2491 static stmt_ty
ast_for_flow_stmt(struct compiling * c,const node * n)2492 ast_for_flow_stmt(struct compiling *c, const node *n)
2493 {
2494     /*
2495       flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt
2496                  | yield_stmt
2497       break_stmt: 'break'
2498       continue_stmt: 'continue'
2499       return_stmt: 'return' [testlist]
2500       yield_stmt: yield_expr
2501       yield_expr: 'yield' testlist
2502       raise_stmt: 'raise' [test [',' test [',' test]]]
2503     */
2504     node *ch;
2505 
2506     REQ(n, flow_stmt);
2507     ch = CHILD(n, 0);
2508     switch (TYPE(ch)) {
2509         case break_stmt:
2510             return Break(LINENO(n), n->n_col_offset, c->c_arena);
2511         case continue_stmt:
2512             return Continue(LINENO(n), n->n_col_offset, c->c_arena);
2513         case yield_stmt: { /* will reduce to yield_expr */
2514             expr_ty exp = ast_for_expr(c, CHILD(ch, 0));
2515             if (!exp)
2516                 return NULL;
2517             return Expr(exp, LINENO(n), n->n_col_offset, c->c_arena);
2518         }
2519         case return_stmt:
2520             if (NCH(ch) == 1)
2521                 return Return(NULL, LINENO(n), n->n_col_offset, c->c_arena);
2522             else {
2523                 expr_ty expression = ast_for_testlist(c, CHILD(ch, 1));
2524                 if (!expression)
2525                     return NULL;
2526                 return Return(expression, LINENO(n), n->n_col_offset,
2527                               c->c_arena);
2528             }
2529         case raise_stmt:
2530             if (NCH(ch) == 1)
2531                 return Raise(NULL, NULL, NULL, LINENO(n), n->n_col_offset,
2532                              c->c_arena);
2533             else if (NCH(ch) == 2) {
2534                 expr_ty expression = ast_for_expr(c, CHILD(ch, 1));
2535                 if (!expression)
2536                     return NULL;
2537                 return Raise(expression, NULL, NULL, LINENO(n),
2538                              n->n_col_offset, c->c_arena);
2539             }
2540             else if (NCH(ch) == 4) {
2541                 expr_ty expr1, expr2;
2542 
2543                 expr1 = ast_for_expr(c, CHILD(ch, 1));
2544                 if (!expr1)
2545                     return NULL;
2546                 expr2 = ast_for_expr(c, CHILD(ch, 3));
2547                 if (!expr2)
2548                     return NULL;
2549 
2550                 return Raise(expr1, expr2, NULL, LINENO(n), n->n_col_offset,
2551                              c->c_arena);
2552             }
2553             else if (NCH(ch) == 6) {
2554                 expr_ty expr1, expr2, expr3;
2555 
2556                 expr1 = ast_for_expr(c, CHILD(ch, 1));
2557                 if (!expr1)
2558                     return NULL;
2559                 expr2 = ast_for_expr(c, CHILD(ch, 3));
2560                 if (!expr2)
2561                     return NULL;
2562                 expr3 = ast_for_expr(c, CHILD(ch, 5));
2563                 if (!expr3)
2564                     return NULL;
2565 
2566                 return Raise(expr1, expr2, expr3, LINENO(n), n->n_col_offset,
2567                              c->c_arena);
2568             }
2569         default:
2570             PyErr_Format(PyExc_SystemError,
2571                          "unexpected flow_stmt: %d", TYPE(ch));
2572             return NULL;
2573     }
2574 }
2575 
2576 static alias_ty
alias_for_import_name(struct compiling * c,const node * n,int store)2577 alias_for_import_name(struct compiling *c, const node *n, int store)
2578 {
2579     /*
2580       import_as_name: NAME ['as' NAME]
2581       dotted_as_name: dotted_name ['as' NAME]
2582       dotted_name: NAME ('.' NAME)*
2583     */
2584     PyObject *str, *name;
2585 
2586  loop:
2587     switch (TYPE(n)) {
2588          case import_as_name: {
2589             node *name_node = CHILD(n, 0);
2590             str = NULL;
2591             if (NCH(n) == 3) {
2592                 node *str_node = CHILD(n, 2);
2593                 if (store && !forbidden_check(c, str_node, STR(str_node)))
2594                     return NULL;
2595                 str = NEW_IDENTIFIER(str_node);
2596                 if (!str)
2597                     return NULL;
2598             }
2599             else {
2600                 if (!forbidden_check(c, name_node, STR(name_node)))
2601                     return NULL;
2602             }
2603             name = NEW_IDENTIFIER(name_node);
2604             if (!name)
2605                 return NULL;
2606             return alias(name, str, c->c_arena);
2607         }
2608         case dotted_as_name:
2609             if (NCH(n) == 1) {
2610                 n = CHILD(n, 0);
2611                 goto loop;
2612             }
2613             else {
2614                 node *asname_node = CHILD(n, 2);
2615                 alias_ty a = alias_for_import_name(c, CHILD(n, 0), 0);
2616                 if (!a)
2617                     return NULL;
2618                 assert(!a->asname);
2619                 if (!forbidden_check(c, asname_node, STR(asname_node)))
2620                     return NULL;
2621                 a->asname = NEW_IDENTIFIER(asname_node);
2622                 if (!a->asname)
2623                     return NULL;
2624                 return a;
2625             }
2626             break;
2627         case dotted_name:
2628             if (NCH(n) == 1) {
2629                 node *name_node = CHILD(n, 0);
2630                 if (store && !forbidden_check(c, name_node, STR(name_node)))
2631                     return NULL;
2632                 name = NEW_IDENTIFIER(name_node);
2633                 if (!name)
2634                     return NULL;
2635                 return alias(name, NULL, c->c_arena);
2636             }
2637             else {
2638                 /* Create a string of the form "a.b.c" */
2639                 int i;
2640                 size_t len;
2641                 char *s;
2642                 PyObject *uni;
2643 
2644                 len = 0;
2645                 for (i = 0; i < NCH(n); i += 2)
2646                     /* length of string plus one for the dot */
2647                     len += strlen(STR(CHILD(n, i))) + 1;
2648                 len--; /* the last name doesn't have a dot */
2649                 str = PyBytes_FromStringAndSize(NULL, len);
2650                 if (!str)
2651                     return NULL;
2652                 s = PyBytes_AS_STRING(str);
2653                 if (!s)
2654                     return NULL;
2655                 for (i = 0; i < NCH(n); i += 2) {
2656                     char *sch = STR(CHILD(n, i));
2657                     strcpy(s, STR(CHILD(n, i)));
2658                     s += strlen(sch);
2659                     *s++ = '.';
2660                 }
2661                 --s;
2662                 *s = '\0';
2663                 uni = PyUnicode_DecodeUTF8(PyBytes_AS_STRING(str),
2664                                            PyBytes_GET_SIZE(str),
2665                                            NULL);
2666                 Py_DECREF(str);
2667                 if (!uni)
2668                     return NULL;
2669                 str = uni;
2670                 PyUnicode_InternInPlace(&str);
2671                 if (PyArena_AddPyObject(c->c_arena, str) < 0) {
2672                     Py_DECREF(str);
2673                     return NULL;
2674                 }
2675                 return alias(str, NULL, c->c_arena);
2676             }
2677             break;
2678         case STAR:
2679             str = PyUnicode_InternFromString("*");
2680             if (PyArena_AddPyObject(c->c_arena, str) < 0) {
2681                 Py_DECREF(str);
2682                 return NULL;
2683             }
2684             return alias(str, NULL, c->c_arena);
2685         default:
2686             PyErr_Format(PyExc_SystemError,
2687                          "unexpected import name: %d", TYPE(n));
2688             return NULL;
2689     }
2690 
2691     PyErr_SetString(PyExc_SystemError, "unhandled import name condition");
2692     return NULL;
2693 }
2694 
2695 static stmt_ty
ast_for_import_stmt(struct compiling * c,const node * n)2696 ast_for_import_stmt(struct compiling *c, const node *n)
2697 {
2698     /*
2699       import_stmt: import_name | import_from
2700       import_name: 'import' dotted_as_names
2701       import_from: 'from' ('.'* dotted_name | '.') 'import'
2702                           ('*' | '(' import_as_names ')' | import_as_names)
2703     */
2704     int lineno;
2705     int col_offset;
2706     int i;
2707     asdl_seq *aliases;
2708 
2709     REQ(n, import_stmt);
2710     lineno = LINENO(n);
2711     col_offset = n->n_col_offset;
2712     n = CHILD(n, 0);
2713     if (TYPE(n) == import_name) {
2714         n = CHILD(n, 1);
2715         REQ(n, dotted_as_names);
2716         aliases = asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
2717         if (!aliases)
2718             return NULL;
2719         for (i = 0; i < NCH(n); i += 2) {
2720             alias_ty import_alias = alias_for_import_name(c, CHILD(n, i), 1);
2721             if (!import_alias)
2722                 return NULL;
2723             asdl_seq_SET(aliases, i / 2, import_alias);
2724         }
2725         return Import(aliases, lineno, col_offset, c->c_arena);
2726     }
2727     else if (TYPE(n) == import_from) {
2728         int n_children;
2729         int idx, ndots = 0;
2730         alias_ty mod = NULL;
2731         identifier modname = NULL;
2732 
2733        /* Count the number of dots (for relative imports) and check for the
2734           optional module name */
2735         for (idx = 1; idx < NCH(n); idx++) {
2736             if (TYPE(CHILD(n, idx)) == dotted_name) {
2737                 mod = alias_for_import_name(c, CHILD(n, idx), 0);
2738                 if (!mod)
2739                     return NULL;
2740                 idx++;
2741                 break;
2742             } else if (TYPE(CHILD(n, idx)) != DOT) {
2743                 break;
2744             }
2745             ndots++;
2746         }
2747         idx++; /* skip over the 'import' keyword */
2748         switch (TYPE(CHILD(n, idx))) {
2749         case STAR:
2750             /* from ... import * */
2751             n = CHILD(n, idx);
2752             n_children = 1;
2753             break;
2754         case LPAR:
2755             /* from ... import (x, y, z) */
2756             n = CHILD(n, idx + 1);
2757             n_children = NCH(n);
2758             break;
2759         case import_as_names:
2760             /* from ... import x, y, z */
2761             n = CHILD(n, idx);
2762             n_children = NCH(n);
2763             if (n_children % 2 == 0) {
2764                 ast_error(n, "trailing comma not allowed without"
2765                              " surrounding parentheses");
2766                 return NULL;
2767             }
2768             break;
2769         default:
2770             ast_error(n, "Unexpected node-type in from-import");
2771             return NULL;
2772         }
2773 
2774         aliases = asdl_seq_new((n_children + 1) / 2, c->c_arena);
2775         if (!aliases)
2776             return NULL;
2777 
2778         /* handle "from ... import *" special b/c there's no children */
2779         if (TYPE(n) == STAR) {
2780             alias_ty import_alias = alias_for_import_name(c, n, 1);
2781             if (!import_alias)
2782                 return NULL;
2783                 asdl_seq_SET(aliases, 0, import_alias);
2784         }
2785         else {
2786             for (i = 0; i < NCH(n); i += 2) {
2787                 alias_ty import_alias = alias_for_import_name(c, CHILD(n, i), 1);
2788                 if (!import_alias)
2789                     return NULL;
2790                     asdl_seq_SET(aliases, i / 2, import_alias);
2791             }
2792         }
2793         if (mod != NULL)
2794             modname = mod->name;
2795         return ImportFrom(modname, aliases, ndots, lineno, col_offset,
2796                           c->c_arena);
2797     }
2798     PyErr_Format(PyExc_SystemError,
2799                  "unknown import statement: starts with command '%s'",
2800                  STR(CHILD(n, 0)));
2801     return NULL;
2802 }
2803 
2804 static stmt_ty
ast_for_global_stmt(struct compiling * c,const node * n)2805 ast_for_global_stmt(struct compiling *c, const node *n)
2806 {
2807     /* global_stmt: 'global' NAME (',' NAME)* */
2808     identifier name;
2809     asdl_seq *s;
2810     int i;
2811 
2812     REQ(n, global_stmt);
2813     s = asdl_seq_new(NCH(n) / 2, c->c_arena);
2814     if (!s)
2815         return NULL;
2816     for (i = 1; i < NCH(n); i += 2) {
2817         name = NEW_IDENTIFIER(CHILD(n, i));
2818         if (!name)
2819             return NULL;
2820         asdl_seq_SET(s, i / 2, name);
2821     }
2822     return Global(s, LINENO(n), n->n_col_offset, c->c_arena);
2823 }
2824 
2825 static stmt_ty
ast_for_exec_stmt(struct compiling * c,const node * n)2826 ast_for_exec_stmt(struct compiling *c, const node *n)
2827 {
2828     expr_ty expr1, globals = NULL, locals = NULL;
2829     int n_children = NCH(n);
2830     if (n_children != 2 && n_children != 4 && n_children != 6) {
2831         PyErr_Format(PyExc_SystemError,
2832                      "poorly formed 'exec' statement: %d parts to statement",
2833                      n_children);
2834         return NULL;
2835     }
2836 
2837     /* exec_stmt: 'exec' expr ['in' test [',' test]] */
2838     REQ(n, exec_stmt);
2839     expr1 = ast_for_expr(c, CHILD(n, 1));
2840     if (!expr1)
2841         return NULL;
2842 
2843     if (expr1->kind == Tuple_kind && n_children < 4 &&
2844         (asdl_seq_LEN(expr1->v.Tuple.elts) == 2 ||
2845          asdl_seq_LEN(expr1->v.Tuple.elts) == 3)) {
2846         /* Backwards compatibility: passing exec args as a tuple */
2847         globals = asdl_seq_GET(expr1->v.Tuple.elts, 1);
2848         if (asdl_seq_LEN(expr1->v.Tuple.elts) == 3) {
2849             locals = asdl_seq_GET(expr1->v.Tuple.elts, 2);
2850         }
2851         expr1 = asdl_seq_GET(expr1->v.Tuple.elts, 0);
2852     }
2853 
2854     if (n_children >= 4) {
2855         globals = ast_for_expr(c, CHILD(n, 3));
2856         if (!globals)
2857             return NULL;
2858     }
2859     if (n_children == 6) {
2860         locals = ast_for_expr(c, CHILD(n, 5));
2861         if (!locals)
2862             return NULL;
2863     }
2864 
2865     return Exec(expr1, globals, locals, LINENO(n), n->n_col_offset,
2866                 c->c_arena);
2867 }
2868 
2869 static stmt_ty
ast_for_assert_stmt(struct compiling * c,const node * n)2870 ast_for_assert_stmt(struct compiling *c, const node *n)
2871 {
2872     /* assert_stmt: 'assert' test [',' test] */
2873     REQ(n, assert_stmt);
2874     if (NCH(n) == 2) {
2875         expr_ty expression = ast_for_expr(c, CHILD(n, 1));
2876         if (!expression)
2877             return NULL;
2878         return Assert(expression, NULL, LINENO(n), n->n_col_offset,
2879                       c->c_arena);
2880     }
2881     else if (NCH(n) == 4) {
2882         expr_ty expr1, expr2;
2883 
2884         expr1 = ast_for_expr(c, CHILD(n, 1));
2885         if (!expr1)
2886             return NULL;
2887         expr2 = ast_for_expr(c, CHILD(n, 3));
2888         if (!expr2)
2889             return NULL;
2890 
2891         return Assert(expr1, expr2, LINENO(n), n->n_col_offset, c->c_arena);
2892     }
2893     PyErr_Format(PyExc_SystemError,
2894                  "improper number of parts to 'assert' statement: %d",
2895                  NCH(n));
2896     return NULL;
2897 }
2898 
2899 static asdl_seq *
ast_for_suite(struct compiling * c,const node * n)2900 ast_for_suite(struct compiling *c, const node *n)
2901 {
2902     /* suite: simple_stmt | NEWLINE [TYPE_COMMENT NEWLINE] INDENT stmt+ DEDENT */
2903     asdl_seq *seq;
2904     stmt_ty s;
2905     int i, total, num, end, pos = 0;
2906     node *ch;
2907 
2908     REQ(n, suite);
2909 
2910     total = num_stmts(n);
2911     seq = asdl_seq_new(total, c->c_arena);
2912     if (!seq)
2913         return NULL;
2914     if (TYPE(CHILD(n, 0)) == simple_stmt) {
2915         n = CHILD(n, 0);
2916         /* simple_stmt always ends with a NEWLINE,
2917            and may have a trailing SEMI
2918         */
2919         end = NCH(n) - 1;
2920         if (TYPE(CHILD(n, end - 1)) == SEMI)
2921             end--;
2922         /* loop by 2 to skip semi-colons */
2923         for (i = 0; i < end; i += 2) {
2924             ch = CHILD(n, i);
2925             s = ast_for_stmt(c, ch);
2926             if (!s)
2927                 return NULL;
2928             asdl_seq_SET(seq, pos++, s);
2929         }
2930     }
2931     else {
2932         i = 2;
2933         if (TYPE(CHILD(n, 1)) == TYPE_COMMENT)
2934             i += 2;
2935 
2936         for (; i < (NCH(n) - 1); i++) {
2937             ch = CHILD(n, i);
2938             REQ(ch, stmt);
2939             num = num_stmts(ch);
2940             if (num == 1) {
2941                 /* small_stmt or compound_stmt with only one child */
2942                 s = ast_for_stmt(c, ch);
2943                 if (!s)
2944                     return NULL;
2945                 asdl_seq_SET(seq, pos++, s);
2946             }
2947             else {
2948                 int j;
2949                 ch = CHILD(ch, 0);
2950                 REQ(ch, simple_stmt);
2951                 for (j = 0; j < NCH(ch); j += 2) {
2952                     /* statement terminates with a semi-colon ';' */
2953                     if (NCH(CHILD(ch, j)) == 0) {
2954                         assert((j + 1) == NCH(ch));
2955                         break;
2956                     }
2957                     s = ast_for_stmt(c, CHILD(ch, j));
2958                     if (!s)
2959                         return NULL;
2960                     asdl_seq_SET(seq, pos++, s);
2961                 }
2962             }
2963         }
2964     }
2965     assert(pos == seq->size);
2966     return seq;
2967 }
2968 
2969 static stmt_ty
ast_for_if_stmt(struct compiling * c,const node * n)2970 ast_for_if_stmt(struct compiling *c, const node *n)
2971 {
2972     /* if_stmt: 'if' test ':' suite ('elif' test ':' suite)*
2973        ['else' ':' suite]
2974     */
2975     char *s;
2976 
2977     REQ(n, if_stmt);
2978 
2979     if (NCH(n) == 4) {
2980         expr_ty expression;
2981         asdl_seq *suite_seq;
2982 
2983         expression = ast_for_expr(c, CHILD(n, 1));
2984         if (!expression)
2985             return NULL;
2986         suite_seq = ast_for_suite(c, CHILD(n, 3));
2987         if (!suite_seq)
2988             return NULL;
2989 
2990         return If(expression, suite_seq, NULL, LINENO(n), n->n_col_offset,
2991                   c->c_arena);
2992     }
2993 
2994     s = STR(CHILD(n, 4));
2995     /* s[2], the third character in the string, will be
2996        's' for el_s_e, or
2997        'i' for el_i_f
2998     */
2999     if (s[2] == 's') {
3000         expr_ty expression;
3001         asdl_seq *seq1, *seq2;
3002 
3003         expression = ast_for_expr(c, CHILD(n, 1));
3004         if (!expression)
3005             return NULL;
3006         seq1 = ast_for_suite(c, CHILD(n, 3));
3007         if (!seq1)
3008             return NULL;
3009         seq2 = ast_for_suite(c, CHILD(n, 6));
3010         if (!seq2)
3011             return NULL;
3012 
3013         return If(expression, seq1, seq2, LINENO(n), n->n_col_offset,
3014                   c->c_arena);
3015     }
3016     else if (s[2] == 'i') {
3017         int i, n_elif, has_else = 0;
3018         expr_ty expression;
3019         asdl_seq *suite_seq;
3020         asdl_seq *orelse = NULL;
3021         n_elif = NCH(n) - 4;
3022         /* must reference the child n_elif+1 since 'else' token is third,
3023            not fourth, child from the end. */
3024         if (TYPE(CHILD(n, (n_elif + 1))) == NAME
3025             && STR(CHILD(n, (n_elif + 1)))[2] == 's') {
3026             has_else = 1;
3027             n_elif -= 3;
3028         }
3029         n_elif /= 4;
3030 
3031         if (has_else) {
3032             asdl_seq *suite_seq2;
3033 
3034             orelse = asdl_seq_new(1, c->c_arena);
3035             if (!orelse)
3036                 return NULL;
3037             expression = ast_for_expr(c, CHILD(n, NCH(n) - 6));
3038             if (!expression)
3039                 return NULL;
3040             suite_seq = ast_for_suite(c, CHILD(n, NCH(n) - 4));
3041             if (!suite_seq)
3042                 return NULL;
3043             suite_seq2 = ast_for_suite(c, CHILD(n, NCH(n) - 1));
3044             if (!suite_seq2)
3045                 return NULL;
3046 
3047             asdl_seq_SET(orelse, 0,
3048                          If(expression, suite_seq, suite_seq2,
3049                             LINENO(CHILD(n, NCH(n) - 6)),
3050                             CHILD(n, NCH(n) - 6)->n_col_offset,
3051                             c->c_arena));
3052             /* the just-created orelse handled the last elif */
3053             n_elif--;
3054         }
3055 
3056         for (i = 0; i < n_elif; i++) {
3057             int off = 5 + (n_elif - i - 1) * 4;
3058             asdl_seq *newobj = asdl_seq_new(1, c->c_arena);
3059             if (!newobj)
3060                 return NULL;
3061             expression = ast_for_expr(c, CHILD(n, off));
3062             if (!expression)
3063                 return NULL;
3064             suite_seq = ast_for_suite(c, CHILD(n, off + 2));
3065             if (!suite_seq)
3066                 return NULL;
3067 
3068             asdl_seq_SET(newobj, 0,
3069                          If(expression, suite_seq, orelse,
3070                             LINENO(CHILD(n, off)),
3071                             CHILD(n, off)->n_col_offset, c->c_arena));
3072             orelse = newobj;
3073         }
3074         expression = ast_for_expr(c, CHILD(n, 1));
3075         if (!expression)
3076             return NULL;
3077         suite_seq = ast_for_suite(c, CHILD(n, 3));
3078         if (!suite_seq)
3079             return NULL;
3080         return If(expression, suite_seq, orelse,
3081                   LINENO(n), n->n_col_offset, c->c_arena);
3082     }
3083 
3084     PyErr_Format(PyExc_SystemError,
3085                  "unexpected token in 'if' statement: %s", s);
3086     return NULL;
3087 }
3088 
3089 static stmt_ty
ast_for_while_stmt(struct compiling * c,const node * n)3090 ast_for_while_stmt(struct compiling *c, const node *n)
3091 {
3092     /* while_stmt: 'while' test ':' suite ['else' ':' suite] */
3093     REQ(n, while_stmt);
3094 
3095     if (NCH(n) == 4) {
3096         expr_ty expression;
3097         asdl_seq *suite_seq;
3098 
3099         expression = ast_for_expr(c, CHILD(n, 1));
3100         if (!expression)
3101             return NULL;
3102         suite_seq = ast_for_suite(c, CHILD(n, 3));
3103         if (!suite_seq)
3104             return NULL;
3105         return While(expression, suite_seq, NULL, LINENO(n), n->n_col_offset,
3106                      c->c_arena);
3107     }
3108     else if (NCH(n) == 7) {
3109         expr_ty expression;
3110         asdl_seq *seq1, *seq2;
3111 
3112         expression = ast_for_expr(c, CHILD(n, 1));
3113         if (!expression)
3114             return NULL;
3115         seq1 = ast_for_suite(c, CHILD(n, 3));
3116         if (!seq1)
3117             return NULL;
3118         seq2 = ast_for_suite(c, CHILD(n, 6));
3119         if (!seq2)
3120             return NULL;
3121 
3122         return While(expression, seq1, seq2, LINENO(n), n->n_col_offset,
3123                      c->c_arena);
3124     }
3125 
3126     PyErr_Format(PyExc_SystemError,
3127                  "wrong number of tokens for 'while' statement: %d",
3128                  NCH(n));
3129     return NULL;
3130 }
3131 
3132 static stmt_ty
ast_for_for_stmt(struct compiling * c,const node * n)3133 ast_for_for_stmt(struct compiling *c, const node *n)
3134 {
3135     asdl_seq *_target, *seq = NULL, *suite_seq;
3136     expr_ty expression;
3137     expr_ty target, first;
3138     const node *node_target;
3139     int has_type_comment;
3140     string type_comment;
3141     /* for_stmt: 'for' exprlist 'in' testlist ':' [TYPE_COMMENT] suite ['else' ':' suite] */
3142     REQ(n, for_stmt);
3143 
3144     has_type_comment = TYPE(CHILD(n, 5)) == TYPE_COMMENT;
3145 
3146     if (NCH(n) == 9 + has_type_comment) {
3147         seq = ast_for_suite(c, CHILD(n, 8 + has_type_comment));
3148         if (!seq)
3149             return NULL;
3150     }
3151 
3152     node_target = CHILD(n, 1);
3153     _target = ast_for_exprlist(c, node_target, Store);
3154     if (!_target)
3155         return NULL;
3156     /* Check the # of children rather than the length of _target, since
3157        for x, in ... has 1 element in _target, but still requires a Tuple. */
3158     first = (expr_ty)asdl_seq_GET(_target, 0);
3159     if (NCH(node_target) == 1)
3160         target = first;
3161     else
3162         target = Tuple(_target, Store, first->lineno, first->col_offset, c->c_arena);
3163 
3164     expression = ast_for_testlist(c, CHILD(n, 3));
3165     if (!expression)
3166         return NULL;
3167     suite_seq = ast_for_suite(c, CHILD(n, 5 + has_type_comment));
3168     if (!suite_seq)
3169         return NULL;
3170 
3171     if (has_type_comment)
3172         type_comment = NEW_TYPE_COMMENT(CHILD(n, 5));
3173     else
3174         type_comment = NULL;
3175 
3176     return For(target, expression, suite_seq, seq, type_comment, LINENO(n), n->n_col_offset,
3177                c->c_arena);
3178 }
3179 
3180 static excepthandler_ty
ast_for_except_clause(struct compiling * c,const node * exc,node * body)3181 ast_for_except_clause(struct compiling *c, const node *exc, node *body)
3182 {
3183     /* except_clause: 'except' [test [(',' | 'as') test]] */
3184     REQ(exc, except_clause);
3185     REQ(body, suite);
3186 
3187     if (NCH(exc) == 1) {
3188         asdl_seq *suite_seq = ast_for_suite(c, body);
3189         if (!suite_seq)
3190             return NULL;
3191 
3192         return ExceptHandler(NULL, NULL, suite_seq, LINENO(exc),
3193                              exc->n_col_offset, c->c_arena);
3194     }
3195     else if (NCH(exc) == 2) {
3196         expr_ty expression;
3197         asdl_seq *suite_seq;
3198 
3199         expression = ast_for_expr(c, CHILD(exc, 1));
3200         if (!expression)
3201             return NULL;
3202         suite_seq = ast_for_suite(c, body);
3203         if (!suite_seq)
3204             return NULL;
3205 
3206         return ExceptHandler(expression, NULL, suite_seq, LINENO(exc),
3207                              exc->n_col_offset, c->c_arena);
3208     }
3209     else if (NCH(exc) == 4) {
3210         asdl_seq *suite_seq;
3211         expr_ty expression;
3212         expr_ty e = ast_for_expr(c, CHILD(exc, 3));
3213         if (!e)
3214             return NULL;
3215         if (!set_context(c, e, Store, CHILD(exc, 3)))
3216             return NULL;
3217         expression = ast_for_expr(c, CHILD(exc, 1));
3218         if (!expression)
3219             return NULL;
3220         suite_seq = ast_for_suite(c, body);
3221         if (!suite_seq)
3222             return NULL;
3223 
3224         return ExceptHandler(expression, e, suite_seq, LINENO(exc),
3225                              exc->n_col_offset, c->c_arena);
3226     }
3227 
3228     PyErr_Format(PyExc_SystemError,
3229                  "wrong number of children for 'except' clause: %d",
3230                  NCH(exc));
3231     return NULL;
3232 }
3233 
3234 static stmt_ty
ast_for_try_stmt(struct compiling * c,const node * n)3235 ast_for_try_stmt(struct compiling *c, const node *n)
3236 {
3237     const int nch = NCH(n);
3238     int n_except = (nch - 3)/3;
3239     asdl_seq *body, *orelse = NULL, *finally = NULL;
3240 
3241     REQ(n, try_stmt);
3242 
3243     body = ast_for_suite(c, CHILD(n, 2));
3244     if (body == NULL)
3245         return NULL;
3246 
3247     if (TYPE(CHILD(n, nch - 3)) == NAME) {
3248         if (strcmp(STR(CHILD(n, nch - 3)), "finally") == 0) {
3249             if (nch >= 9 && TYPE(CHILD(n, nch - 6)) == NAME) {
3250                 /* we can assume it's an "else",
3251                    because nch >= 9 for try-else-finally and
3252                    it would otherwise have a type of except_clause */
3253                 orelse = ast_for_suite(c, CHILD(n, nch - 4));
3254                 if (orelse == NULL)
3255                     return NULL;
3256                 n_except--;
3257             }
3258 
3259             finally = ast_for_suite(c, CHILD(n, nch - 1));
3260             if (finally == NULL)
3261                 return NULL;
3262             n_except--;
3263         }
3264         else {
3265             /* we can assume it's an "else",
3266                otherwise it would have a type of except_clause */
3267             orelse = ast_for_suite(c, CHILD(n, nch - 1));
3268             if (orelse == NULL)
3269                 return NULL;
3270             n_except--;
3271         }
3272     }
3273     else if (TYPE(CHILD(n, nch - 3)) != except_clause) {
3274         ast_error(n, "malformed 'try' statement");
3275         return NULL;
3276     }
3277 
3278     if (n_except > 0) {
3279         int i;
3280         stmt_ty except_st;
3281         /* process except statements to create a try ... except */
3282         asdl_seq *handlers = asdl_seq_new(n_except, c->c_arena);
3283         if (handlers == NULL)
3284             return NULL;
3285 
3286         for (i = 0; i < n_except; i++) {
3287             excepthandler_ty e = ast_for_except_clause(c, CHILD(n, 3 + i * 3),
3288                                                        CHILD(n, 5 + i * 3));
3289             if (!e)
3290                 return NULL;
3291             asdl_seq_SET(handlers, i, e);
3292         }
3293 
3294         except_st = TryExcept(body, handlers, orelse, LINENO(n),
3295                               n->n_col_offset, c->c_arena);
3296         if (!finally)
3297             return except_st;
3298 
3299         /* if a 'finally' is present too, we nest the TryExcept within a
3300            TryFinally to emulate try ... except ... finally */
3301         body = asdl_seq_new(1, c->c_arena);
3302         if (body == NULL)
3303             return NULL;
3304         asdl_seq_SET(body, 0, except_st);
3305     }
3306 
3307     /* must be a try ... finally (except clauses are in body, if any exist) */
3308     assert(finally != NULL);
3309     return TryFinally(body, finally, LINENO(n), n->n_col_offset, c->c_arena);
3310 }
3311 
3312 /* with_item: test ['as' expr] */
3313 static stmt_ty
ast_for_with_item(struct compiling * c,const node * n,asdl_seq * content,string type_comment)3314 ast_for_with_item(struct compiling *c, const node *n, asdl_seq *content, string type_comment)
3315 {
3316     expr_ty context_expr, optional_vars = NULL;
3317 
3318     REQ(n, with_item);
3319     context_expr = ast_for_expr(c, CHILD(n, 0));
3320     if (!context_expr)
3321         return NULL;
3322     if (NCH(n) == 3) {
3323         optional_vars = ast_for_expr(c, CHILD(n, 2));
3324 
3325         if (!optional_vars) {
3326             return NULL;
3327         }
3328         if (!set_context(c, optional_vars, Store, n)) {
3329             return NULL;
3330         }
3331     }
3332 
3333     return With(context_expr, optional_vars, content, type_comment, LINENO(n),
3334                 n->n_col_offset, c->c_arena);
3335 }
3336 
3337 /* with_stmt: 'with' with_item (',' with_item)*  ':' [TYPE_COMMENT] suite */
3338 static stmt_ty
ast_for_with_stmt(struct compiling * c,const node * n)3339 ast_for_with_stmt(struct compiling *c, const node *n)
3340 {
3341     int i, has_type_comment;
3342     stmt_ty ret;
3343     asdl_seq *inner;
3344     string type_comment;
3345 
3346     REQ(n, with_stmt);
3347 
3348     has_type_comment = TYPE(CHILD(n, NCH(n) - 2)) == TYPE_COMMENT;
3349 
3350     /* process the with items inside-out */
3351     i = NCH(n) - 1;
3352     /* the suite of the innermost with item is the suite of the with stmt */
3353     inner = ast_for_suite(c, CHILD(n, i));
3354     if (!inner)
3355         return NULL;
3356 
3357     if (has_type_comment) {
3358         type_comment = NEW_TYPE_COMMENT(CHILD(n, NCH(n) - 2));
3359         i--;
3360     } else
3361         type_comment = NULL;
3362 
3363 
3364     for (;;) {
3365         i -= 2;
3366         ret = ast_for_with_item(c, CHILD(n, i), inner, type_comment);
3367         if (!ret)
3368             return NULL;
3369         /* was this the last item? */
3370         if (i == 1)
3371             break;
3372         /* if not, wrap the result so far in a new sequence */
3373         inner = asdl_seq_new(1, c->c_arena);
3374         if (!inner)
3375             return NULL;
3376         asdl_seq_SET(inner, 0, ret);
3377     }
3378 
3379     return ret;
3380 }
3381 
3382 static stmt_ty
ast_for_classdef(struct compiling * c,const node * n,asdl_seq * decorator_seq)3383 ast_for_classdef(struct compiling *c, const node *n, asdl_seq *decorator_seq)
3384 {
3385     /* classdef: 'class' NAME ['(' testlist ')'] ':' suite */
3386     PyObject *classname;
3387     asdl_seq *bases, *s;
3388 
3389     REQ(n, classdef);
3390 
3391     if (!forbidden_check(c, n, STR(CHILD(n, 1))))
3392             return NULL;
3393 
3394     if (NCH(n) == 4) {
3395         s = ast_for_suite(c, CHILD(n, 3));
3396         if (!s)
3397             return NULL;
3398         classname = NEW_IDENTIFIER(CHILD(n, 1));
3399         if (!classname)
3400             return NULL;
3401         return ClassDef(classname, NULL, s, decorator_seq, LINENO(n),
3402                         n->n_col_offset, c->c_arena);
3403     }
3404     /* check for empty base list */
3405     if (TYPE(CHILD(n,3)) == RPAR) {
3406         s = ast_for_suite(c, CHILD(n,5));
3407         if (!s)
3408             return NULL;
3409         classname = NEW_IDENTIFIER(CHILD(n, 1));
3410         if (!classname)
3411             return NULL;
3412         return ClassDef(classname, NULL, s, decorator_seq, LINENO(n),
3413                         n->n_col_offset, c->c_arena);
3414     }
3415 
3416     /* else handle the base class list */
3417     bases = ast_for_class_bases(c, CHILD(n, 3));
3418     if (!bases)
3419         return NULL;
3420 
3421     s = ast_for_suite(c, CHILD(n, 6));
3422     if (!s)
3423         return NULL;
3424     classname = NEW_IDENTIFIER(CHILD(n, 1));
3425     if (!classname)
3426         return NULL;
3427     return ClassDef(classname, bases, s, decorator_seq,
3428                     LINENO(n), n->n_col_offset, c->c_arena);
3429 }
3430 
3431 static stmt_ty
ast_for_stmt(struct compiling * c,const node * n)3432 ast_for_stmt(struct compiling *c, const node *n)
3433 {
3434     if (TYPE(n) == stmt) {
3435         assert(NCH(n) == 1);
3436         n = CHILD(n, 0);
3437     }
3438     if (TYPE(n) == simple_stmt) {
3439         assert(num_stmts(n) == 1);
3440         n = CHILD(n, 0);
3441     }
3442     if (TYPE(n) == small_stmt) {
3443         n = CHILD(n, 0);
3444         /* small_stmt: expr_stmt | print_stmt  | del_stmt | pass_stmt
3445                      | flow_stmt | import_stmt | global_stmt | exec_stmt
3446                      | assert_stmt
3447         */
3448         switch (TYPE(n)) {
3449             case expr_stmt:
3450                 return ast_for_expr_stmt(c, n);
3451             case print_stmt:
3452                 return ast_for_print_stmt(c, n);
3453             case del_stmt:
3454                 return ast_for_del_stmt(c, n);
3455             case pass_stmt:
3456                 return Pass(LINENO(n), n->n_col_offset, c->c_arena);
3457             case flow_stmt:
3458                 return ast_for_flow_stmt(c, n);
3459             case import_stmt:
3460                 return ast_for_import_stmt(c, n);
3461             case global_stmt:
3462                 return ast_for_global_stmt(c, n);
3463             case exec_stmt:
3464                 return ast_for_exec_stmt(c, n);
3465             case assert_stmt:
3466                 return ast_for_assert_stmt(c, n);
3467             default:
3468                 PyErr_Format(PyExc_SystemError,
3469                              "unhandled small_stmt: TYPE=%d NCH=%d\n",
3470                              TYPE(n), NCH(n));
3471                 return NULL;
3472         }
3473     }
3474     else {
3475         /* compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt
3476                         | funcdef | classdef | decorated
3477         */
3478         node *ch = CHILD(n, 0);
3479         REQ(n, compound_stmt);
3480         switch (TYPE(ch)) {
3481             case if_stmt:
3482                 return ast_for_if_stmt(c, ch);
3483             case while_stmt:
3484                 return ast_for_while_stmt(c, ch);
3485             case for_stmt:
3486                 return ast_for_for_stmt(c, ch);
3487             case try_stmt:
3488                 return ast_for_try_stmt(c, ch);
3489             case with_stmt:
3490                 return ast_for_with_stmt(c, ch);
3491             case funcdef:
3492                 return ast_for_funcdef(c, ch, NULL);
3493             case classdef:
3494                 return ast_for_classdef(c, ch, NULL);
3495             case decorated:
3496                 return ast_for_decorated(c, ch);
3497             default:
3498                 PyErr_Format(PyExc_SystemError,
3499                              "unhandled small_stmt: TYPE=%d NCH=%d\n",
3500                              TYPE(n), NCH(n));
3501                 return NULL;
3502         }
3503     }
3504 }
3505 
3506 static PyObject *
parsenumber(struct compiling * c,const char * s)3507 parsenumber(struct compiling *c, const char *s)
3508 {
3509         const char *end;
3510         long x;
3511         double dx;
3512         int old_style_octal;
3513 #ifndef WITHOUT_COMPLEX
3514         Py_complex complex;
3515         int imflag;
3516 #endif
3517 
3518         assert(s != NULL);
3519         errno = 0;
3520         end = s + strlen(s) - 1;
3521 #ifndef WITHOUT_COMPLEX
3522         imflag = *end == 'j' || *end == 'J';
3523 #endif
3524         if (*end == 'l' || *end == 'L') {
3525                 /* Make a copy without the trailing 'L' */
3526                 size_t len = end - s  + 1;
3527                 char *copy = malloc(len);
3528                 PyObject *result;
3529                 if (copy == NULL)
3530                         return PyErr_NoMemory();
3531                 memcpy(copy, s, len);
3532                 copy[len - 1] = '\0';
3533                 old_style_octal = len > 2 && copy[0] == '0' && copy[1] >= '0' && copy[1] <= '9';
3534                 result = PyLong_FromString(copy, (char **)0, old_style_octal ? 8 : 0);
3535                 free(copy);
3536                 return result;
3537         }
3538         x = Ta27OS_strtol((char *)s, (char **)&end, 0);
3539         if (*end == '\0') {
3540                 if (errno != 0) {
3541                         old_style_octal = end - s > 1 && s[0] == '0' && s[1] >= '0' && s[1] <= '9';
3542                         return PyLong_FromString((char *)s, (char **)0, old_style_octal ? 8 : 0);
3543                 }
3544                 return PyLong_FromLong(x);
3545         }
3546         /* XXX Huge floats may silently fail */
3547 #ifndef WITHOUT_COMPLEX
3548         if (imflag) {
3549                 complex.real = 0.;
3550                 complex.imag = PyOS_string_to_double(s, (char **)&end, NULL);
3551                 if (complex.imag == -1.0 && PyErr_Occurred())
3552                         return NULL;
3553                 return PyComplex_FromCComplex(complex);
3554         }
3555         else
3556 #endif
3557         {
3558                 dx = PyOS_string_to_double(s, NULL, NULL);
3559                 if (dx == -1.0 && PyErr_Occurred())
3560                         return NULL;
3561                 return PyFloat_FromDouble(dx);
3562         }
3563 }
3564 
3565 /* adapted from Python 3.5.1 */
3566 static PyObject *
decode_utf8(struct compiling * c,const char ** sPtr,const char * end)3567 decode_utf8(struct compiling *c, const char **sPtr, const char *end)
3568 {
3569 #ifndef Py_USING_UNICODE
3570         Py_FatalError("decode_utf8 should not be called in this build.");
3571         return NULL;
3572 #else
3573     const char *s, *t;
3574     t = s = *sPtr;
3575     /* while (s < end && *s != '\\') s++; */ /* inefficient for u".." */
3576     while (s < end && (*s & 0x80)) s++;
3577     *sPtr = s;
3578     return PyUnicode_DecodeUTF8(t, s - t, NULL);
3579 #endif
3580 }
3581 
3582 #ifdef Py_USING_UNICODE
3583 /* taken from Python 3.5.1 */
3584 static PyObject *
decode_unicode(struct compiling * c,const char * s,size_t len,int rawmode,const char * encoding)3585 decode_unicode(struct compiling *c, const char *s, size_t len, int rawmode, const char *encoding)
3586 {
3587     PyObject *v, *u;
3588     char *buf;
3589     char *p;
3590     const char *end;
3591 
3592     if (encoding == NULL) {
3593         u = NULL;
3594     } else {
3595         /* check for integer overflow */
3596         if (len > PY_SIZE_MAX / 6)
3597             return NULL;
3598         /* "ä" (2 bytes) may become "\U000000E4" (10 bytes), or 1:5
3599            "\ä" (3 bytes) may become "\u005c\U000000E4" (16 bytes), or ~1:6 */
3600         u = PyBytes_FromStringAndSize((char *)NULL, len * 6);
3601         if (u == NULL)
3602             return NULL;
3603         p = buf = PyBytes_AsString(u);
3604         end = s + len;
3605         while (s < end) {
3606             if (*s == '\\') {
3607                 *p++ = *s++;
3608                 if (*s & 0x80) {
3609                     strcpy(p, "u005c");
3610                     p += 5;
3611                 }
3612             }
3613             if (*s & 0x80) { /* XXX inefficient */
3614                 PyObject *w;
3615                 int kind;
3616                 void *data;
3617                 Py_ssize_t len, i;
3618                 w = decode_utf8(c, &s, end);
3619                 if (w == NULL) {
3620                     Py_DECREF(u);
3621                     return NULL;
3622                 }
3623                 kind = PyUnicode_KIND(w);
3624                 data = PyUnicode_DATA(w);
3625                 len = PyUnicode_GET_LENGTH(w);
3626                 for (i = 0; i < len; i++) {
3627                     Py_UCS4 chr = PyUnicode_READ(kind, data, i);
3628                     sprintf(p, "\\U%08x", chr);
3629                     p += 10;
3630                 }
3631                 /* Should be impossible to overflow */
3632                 assert(p - buf <= Py_SIZE(u));
3633                 Py_DECREF(w);
3634             } else {
3635                 *p++ = *s++;
3636             }
3637         }
3638         len = p - buf;
3639         s = buf;
3640     }
3641     if (rawmode)
3642         v = PyUnicode_DecodeRawUnicodeEscape(s, len, NULL);
3643     else
3644         v = PyUnicode_DecodeUnicodeEscape(s, len, NULL);
3645     Py_XDECREF(u);
3646     return v;
3647 }
3648 #endif
3649 
3650 /* s is a Python string literal, including the bracketing quote characters,
3651  * and r &/or u prefixes (if any), and embedded escape sequences (if any).
3652  * parsestr parses it, and returns the decoded Python string object.
3653  */
3654 static PyObject *
parsestr(struct compiling * c,const node * n,const char * s)3655 parsestr(struct compiling *c, const node *n, const char *s)
3656 {
3657         size_t len, i;
3658         int quote = Py_CHARMASK(*s);
3659         int rawmode = 0;
3660         int need_encoding;
3661         int unicode = c->c_future_unicode;
3662         int bytes = 0;
3663 
3664         if (isalpha(quote) || quote == '_') {
3665                 if (quote == 'u' || quote == 'U') {
3666                         quote = *++s;
3667                         unicode = 1;
3668                 }
3669                 if (quote == 'b' || quote == 'B') {
3670                         quote = *++s;
3671                         unicode = 0;
3672                         bytes = 1;
3673                 }
3674                 if (quote == 'r' || quote == 'R') {
3675                         quote = *++s;
3676                         rawmode = 1;
3677                 }
3678         }
3679         if (quote != '\'' && quote != '\"') {
3680                 PyErr_BadInternalCall();
3681                 return NULL;
3682         }
3683         s++;
3684         len = strlen(s);
3685         if (len > INT_MAX) {
3686                 PyErr_SetString(PyExc_OverflowError,
3687                                 "string to parse is too long");
3688                 return NULL;
3689         }
3690         if (s[--len] != quote) {
3691                 PyErr_BadInternalCall();
3692                 return NULL;
3693         }
3694         if (len >= 4 && s[0] == quote && s[1] == quote) {
3695                 s += 2;
3696                 len -= 2;
3697                 if (s[--len] != quote || s[--len] != quote) {
3698                         PyErr_BadInternalCall();
3699                         return NULL;
3700                 }
3701         }
3702         if (Py_Py3kWarningFlag && bytes) {
3703             for (i = 0; i < len; i++) {
3704                 if ((unsigned char)s[i] > 127) {
3705                     if (!ast_warn(c, n,
3706                         "non-ascii bytes literals not supported in 3.x"))
3707                         return NULL;
3708                     break;
3709                 }
3710             }
3711         }
3712 #ifdef Py_USING_UNICODE
3713         if (unicode || Py_UnicodeFlag) {
3714                 return decode_unicode(c, s, len, rawmode, c->c_encoding);
3715         }
3716 #endif
3717         need_encoding = (c->c_encoding != NULL &&
3718                          strcmp(c->c_encoding, "utf-8") != 0 &&
3719                          strcmp(c->c_encoding, "iso-8859-1") != 0);
3720         if (rawmode || strchr(s, '\\') == NULL) {
3721                 if (need_encoding) {
3722 #ifndef Py_USING_UNICODE
3723                         /* This should not happen - we never see any other
3724                            encoding. */
3725                         Py_FatalError(
3726                             "cannot deal with encodings in this build.");
3727 #else
3728                         PyObject *v, *u = PyUnicode_DecodeUTF8(s, len, NULL);
3729                         if (u == NULL)
3730                                 return NULL;
3731                         v = PyUnicode_AsEncodedString(u, c->c_encoding, NULL);
3732                         Py_DECREF(u);
3733                         return v;
3734 #endif
3735                 } else {
3736                   return PyBytes_FromStringAndSize(s, len);
3737                 }
3738         }
3739 
3740         return PyBytes_DecodeEscape(s, len, NULL, 1,
3741                                     need_encoding ? c->c_encoding : NULL);
3742 }
3743 
3744 /* Build a Python string object out of a STRING atom.  This takes care of
3745  * compile-time literal catenation, calling parsestr() on each piece, and
3746  * pasting the intermediate results together.
3747  */
3748 static PyObject *
parsestrplus(struct compiling * c,const node * n)3749 parsestrplus(struct compiling *c, const node *n)
3750 {
3751         PyObject *v;
3752         int i;
3753         REQ(CHILD(n, 0), STRING);
3754         if ((v = parsestr(c, n, STR(CHILD(n, 0)))) != NULL) {
3755                 /* String literal concatenation */
3756                 for (i = 1; i < NCH(n); i++) {
3757                         PyObject *s;
3758                         s = parsestr(c, n, STR(CHILD(n, i)));
3759                         if (s == NULL)
3760                                 goto onError;
3761                         if (PyBytes_Check(v) && PyBytes_Check(s)) {
3762                                 PyBytes_ConcatAndDel(&v, s);
3763                                 if (v == NULL)
3764                                     goto onError;
3765                         }
3766 #ifdef Py_USING_UNICODE
3767                         else {
3768                                 PyObject *temp;
3769                                 /* Python 2's PyUnicode_FromObject (which is
3770                                  * called on the arguments to PyUnicode_Concat)
3771                                  * automatically converts Bytes objects into
3772                                  * Str objects, but in Python 3 it throws a
3773                                  * syntax error.  To allow mixed literal
3774                                  * concatenation e.g. "foo" u"bar" (which is
3775                                  * valid in Python 2), we have to explicitly
3776                                  * check for Bytes and convert manually. */
3777                                 if (PyBytes_Check(s)) {
3778                                     temp = PyUnicode_FromEncodedObject(s, NULL, "strict");
3779                                     Py_DECREF(s);
3780                                     s = temp;
3781                                 }
3782 
3783                                 if (PyBytes_Check(v)) {
3784                                     temp = PyUnicode_FromEncodedObject(v, NULL, "strict");
3785                                     Py_DECREF(v);
3786                                     v = temp;
3787                                 }
3788 
3789                                 temp = PyUnicode_Concat(v, s);
3790                                 Py_DECREF(s);
3791                                 Py_DECREF(v);
3792                                 v = temp;
3793                                 if (v == NULL)
3794                                     goto onError;
3795                         }
3796 #endif
3797                 }
3798         }
3799         return v;
3800 
3801  onError:
3802         Py_XDECREF(v);
3803         return NULL;
3804 }
3805