1 /*  parsermodule.c
2  *
3  *  Copyright 1995-1996 by Fred L. Drake, Jr. and Virginia Polytechnic
4  *  Institute and State University, Blacksburg, Virginia, USA.
5  *  Portions copyright 1991-1995 by Stichting Mathematisch Centrum,
6  *  Amsterdam, The Netherlands.  Copying is permitted under the terms
7  *  associated with the main Python distribution, with the additional
8  *  restriction that this additional notice be included and maintained
9  *  on all distributed copies.
10  *
11  *  This module serves to replace the original parser module written
12  *  by Guido.  The functionality is not matched precisely, but the
13  *  original may be implemented on top of this.  This is desirable
14  *  since the source of the text to be parsed is now divorced from
15  *  this interface.
16  *
17  *  Unlike the prior interface, the ability to give a parse tree
18  *  produced by Python code as a tuple to the compiler is enabled by
19  *  this module.  See the documentation for more details.
20  *
21  *  I've added some annotations that help with the lint code-checking
22  *  program, but they're not complete by a long shot.  The real errors
23  *  that lint detects are gone, but there are still warnings with
24  *  Py_[X]DECREF() and Py_[X]INCREF() macros.  The lint annotations
25  *  look like "NOTE(...)".
26  *
27  */
28 
29 #include "Python.h"                     /* general Python API             */
30 #include "Python-ast.h"                 /* mod_ty */
31 #undef Yield   /* undefine macro conflicting with <winbase.h> */
32 #include "ast.h"
33 #include "graminit.h"                   /* symbols defined in the grammar */
34 #include "node.h"                       /* internal parser structure      */
35 #include "errcode.h"                    /* error codes for PyNode_*()     */
36 #include "token.h"                      /* token definitions              */
37                                         /* ISTERMINAL() / ISNONTERMINAL() */
38 #include "grammar.h"
39 #include "parsetok.h"
40 
41 extern grammar _PyParser_Grammar; /* From graminit.c */
42 
43 #ifdef lint
44 #include <note.h>
45 #else
46 #define NOTE(x)
47 #endif
48 
49 /*  String constants used to initialize module attributes.
50  *
51  */
52 static const char parser_copyright_string[] =
53 "Copyright 1995-1996 by Virginia Polytechnic Institute & State\n\
54 University, Blacksburg, Virginia, USA, and Fred L. Drake, Jr., Reston,\n\
55 Virginia, USA.  Portions copyright 1991-1995 by Stichting Mathematisch\n\
56 Centrum, Amsterdam, The Netherlands.";
57 
58 
59 PyDoc_STRVAR(parser_doc_string,
60 "This is an interface to Python's internal parser.");
61 
62 static const char parser_version_string[] = "0.5";
63 
64 
65 typedef PyObject* (*SeqMaker) (Py_ssize_t length);
66 typedef int (*SeqInserter) (PyObject* sequence,
67                             Py_ssize_t index,
68                             PyObject* element);
69 
70 /*  The function below is copyrighted by Stichting Mathematisch Centrum.  The
71  *  original copyright statement is included below, and continues to apply
72  *  in full to the function immediately following.  All other material is
73  *  original, copyrighted by Fred L. Drake, Jr. and Virginia Polytechnic
74  *  Institute and State University.  Changes were made to comply with the
75  *  new naming conventions.  Added arguments to provide support for creating
76  *  lists as well as tuples, and optionally including the line numbers.
77  */
78 
79 
80 static PyObject*
node2tuple(node * n,SeqMaker mkseq,SeqInserter addelem,int lineno,int col_offset)81 node2tuple(node *n,                     /* node to convert               */
82            SeqMaker mkseq,              /* create sequence               */
83            SeqInserter addelem,         /* func. to add elem. in seq.    */
84            int lineno,                  /* include line numbers?         */
85            int col_offset)              /* include column offsets?       */
86 {
87     PyObject *result = NULL, *w;
88 
89     if (n == NULL) {
90         Py_RETURN_NONE;
91     }
92 
93     if (ISNONTERMINAL(TYPE(n))) {
94         int i;
95 
96         result = mkseq(1 + NCH(n) + (TYPE(n) == encoding_decl));
97         if (result == NULL)
98             goto error;
99 
100         w = PyLong_FromLong(TYPE(n));
101         if (w == NULL)
102             goto error;
103         (void) addelem(result, 0, w);
104 
105         for (i = 0; i < NCH(n); i++) {
106             w = node2tuple(CHILD(n, i), mkseq, addelem, lineno, col_offset);
107             if (w == NULL)
108                 goto error;
109             (void) addelem(result, i+1, w);
110         }
111 
112         if (TYPE(n) == encoding_decl) {
113             w = PyUnicode_FromString(STR(n));
114             if (w == NULL)
115                 goto error;
116             (void) addelem(result, i+1, w);
117         }
118     }
119     else if (ISTERMINAL(TYPE(n))) {
120         result = mkseq(2 + lineno + col_offset);
121         if (result == NULL)
122             goto error;
123 
124         w = PyLong_FromLong(TYPE(n));
125         if (w == NULL)
126             goto error;
127         (void) addelem(result, 0, w);
128 
129         w = PyUnicode_FromString(STR(n));
130         if (w == NULL)
131             goto error;
132         (void) addelem(result, 1, w);
133 
134         if (lineno) {
135             w = PyLong_FromLong(n->n_lineno);
136             if (w == NULL)
137                 goto error;
138             (void) addelem(result, 2, w);
139         }
140 
141         if (col_offset) {
142             w = PyLong_FromLong(n->n_col_offset);
143             if (w == NULL)
144                 goto error;
145             (void) addelem(result, 2 + lineno, w);
146         }
147     }
148     else {
149         PyErr_SetString(PyExc_SystemError,
150                         "unrecognized parse tree node type");
151         return ((PyObject*) NULL);
152     }
153     return result;
154 
155 error:
156     Py_XDECREF(result);
157     return NULL;
158 }
159 /*
160  *  End of material copyrighted by Stichting Mathematisch Centrum.
161  */
162 
163 
164 
165 /*  There are two types of intermediate objects we're interested in:
166  *  'eval' and 'exec' types.  These constants can be used in the st_type
167  *  field of the object type to identify which any given object represents.
168  *  These should probably go in an external header to allow other extensions
169  *  to use them, but then, we really should be using C++ too.  ;-)
170  */
171 
172 #define PyST_EXPR  1
173 #define PyST_SUITE 2
174 
175 
176 /*  These are the internal objects and definitions required to implement the
177  *  ST type.  Most of the internal names are more reminiscent of the 'old'
178  *  naming style, but the code uses the new naming convention.
179  */
180 
181 static PyObject*
182 parser_error = 0;
183 
184 
185 typedef struct {
186     PyObject_HEAD                       /* standard object header           */
187     node* st_node;                      /* the node* returned by the parser */
188     int   st_type;                      /* EXPR or SUITE ?                  */
189     PyCompilerFlags st_flags;           /* Parser and compiler flags        */
190 } PyST_Object;
191 
192 
193 static void parser_free(PyST_Object *st);
194 static PyObject* parser_sizeof(PyST_Object *, void *);
195 static PyObject* parser_richcompare(PyObject *left, PyObject *right, int op);
196 static PyObject* parser_compilest(PyST_Object *, PyObject *, PyObject *);
197 static PyObject* parser_isexpr(PyST_Object *, PyObject *, PyObject *);
198 static PyObject* parser_issuite(PyST_Object *, PyObject *, PyObject *);
199 static PyObject* parser_st2list(PyST_Object *, PyObject *, PyObject *);
200 static PyObject* parser_st2tuple(PyST_Object *, PyObject *, PyObject *);
201 
202 #define PUBLIC_METHOD_TYPE (METH_VARARGS|METH_KEYWORDS)
203 
204 static PyMethodDef parser_methods[] = {
205     {"compile",         (PyCFunction)(void(*)(void))parser_compilest,  PUBLIC_METHOD_TYPE,
206         PyDoc_STR("Compile this ST object into a code object.")},
207     {"isexpr",          (PyCFunction)(void(*)(void))parser_isexpr,     PUBLIC_METHOD_TYPE,
208         PyDoc_STR("Determines if this ST object was created from an expression.")},
209     {"issuite",         (PyCFunction)(void(*)(void))parser_issuite,    PUBLIC_METHOD_TYPE,
210         PyDoc_STR("Determines if this ST object was created from a suite.")},
211     {"tolist",          (PyCFunction)(void(*)(void))parser_st2list,    PUBLIC_METHOD_TYPE,
212         PyDoc_STR("Creates a list-tree representation of this ST.")},
213     {"totuple",         (PyCFunction)(void(*)(void))parser_st2tuple,   PUBLIC_METHOD_TYPE,
214         PyDoc_STR("Creates a tuple-tree representation of this ST.")},
215     {"__sizeof__",      (PyCFunction)parser_sizeof,     METH_NOARGS,
216         PyDoc_STR("Returns size in memory, in bytes.")},
217     {NULL, NULL, 0, NULL}
218 };
219 
220 static
221 PyTypeObject PyST_Type = {
222     PyVarObject_HEAD_INIT(NULL, 0)
223     "parser.st",                        /* tp_name              */
224     (int) sizeof(PyST_Object),          /* tp_basicsize         */
225     0,                                  /* tp_itemsize          */
226     (destructor)parser_free,            /* tp_dealloc           */
227     0,                                  /* tp_vectorcall_offset */
228     0,                                  /* tp_getattr           */
229     0,                                  /* tp_setattr           */
230     0,                                  /* tp_as_async          */
231     0,                                  /* tp_repr              */
232     0,                                  /* tp_as_number         */
233     0,                                  /* tp_as_sequence       */
234     0,                                  /* tp_as_mapping        */
235     0,                                  /* tp_hash              */
236     0,                                  /* tp_call              */
237     0,                                  /* tp_str               */
238     0,                                  /* tp_getattro          */
239     0,                                  /* tp_setattro          */
240 
241     /* Functions to access object as input/output buffer */
242     0,                                  /* tp_as_buffer         */
243 
244     Py_TPFLAGS_DEFAULT,                 /* tp_flags             */
245 
246     /* __doc__ */
247     "Intermediate representation of a Python parse tree.",
248     0,                                  /* tp_traverse */
249     0,                                  /* tp_clear */
250     parser_richcompare,                 /* tp_richcompare */
251     0,                                  /* tp_weaklistoffset */
252     0,                                  /* tp_iter */
253     0,                                  /* tp_iternext */
254     parser_methods,                     /* tp_methods */
255 };  /* PyST_Type */
256 
257 
258 /* PyST_Type isn't subclassable, so just check ob_type */
259 #define PyST_Object_Check(v) ((v)->ob_type == &PyST_Type)
260 
261 static int
parser_compare_nodes(node * left,node * right)262 parser_compare_nodes(node *left, node *right)
263 {
264     int j;
265 
266     if (TYPE(left) < TYPE(right))
267         return (-1);
268 
269     if (TYPE(right) < TYPE(left))
270         return (1);
271 
272     if (ISTERMINAL(TYPE(left)))
273         return (strcmp(STR(left), STR(right)));
274 
275     if (NCH(left) < NCH(right))
276         return (-1);
277 
278     if (NCH(right) < NCH(left))
279         return (1);
280 
281     for (j = 0; j < NCH(left); ++j) {
282         int v = parser_compare_nodes(CHILD(left, j), CHILD(right, j));
283 
284         if (v != 0)
285             return (v);
286     }
287     return (0);
288 }
289 
290 /*  parser_richcompare(PyObject* left, PyObject* right, int op)
291  *
292  *  Comparison function used by the Python operators ==, !=, <, >, <=, >=
293  *  This really just wraps a call to parser_compare_nodes() with some easy
294  *  checks and protection code.
295  *
296  */
297 
298 static PyObject *
parser_richcompare(PyObject * left,PyObject * right,int op)299 parser_richcompare(PyObject *left, PyObject *right, int op)
300 {
301     int result;
302 
303     /* neither argument should be NULL, unless something's gone wrong */
304     if (left == NULL || right == NULL) {
305         PyErr_BadInternalCall();
306         return NULL;
307     }
308 
309     /* both arguments should be instances of PyST_Object */
310     if (!PyST_Object_Check(left) || !PyST_Object_Check(right)) {
311         Py_RETURN_NOTIMPLEMENTED;
312     }
313 
314     if (left == right)
315         /* if arguments are identical, they're equal */
316         result = 0;
317     else
318         result = parser_compare_nodes(((PyST_Object *)left)->st_node,
319                                       ((PyST_Object *)right)->st_node);
320 
321     Py_RETURN_RICHCOMPARE(result, 0, op);
322 }
323 
324 /*  parser_newstobject(node* st)
325  *
326  *  Allocates a new Python object representing an ST.  This is simply the
327  *  'wrapper' object that holds a node* and allows it to be passed around in
328  *  Python code.
329  *
330  */
331 static PyObject*
parser_newstobject(node * st,int type)332 parser_newstobject(node *st, int type)
333 {
334     PyST_Object* o = PyObject_New(PyST_Object, &PyST_Type);
335 
336     if (o != 0) {
337         o->st_node = st;
338         o->st_type = type;
339         o->st_flags = _PyCompilerFlags_INIT;
340     }
341     else {
342         PyNode_Free(st);
343     }
344     return ((PyObject*)o);
345 }
346 
347 
348 /*  void parser_free(PyST_Object* st)
349  *
350  *  This is called by a del statement that reduces the reference count to 0.
351  *
352  */
353 static void
parser_free(PyST_Object * st)354 parser_free(PyST_Object *st)
355 {
356     PyNode_Free(st->st_node);
357     PyObject_Del(st);
358 }
359 
360 static PyObject *
parser_sizeof(PyST_Object * st,void * unused)361 parser_sizeof(PyST_Object *st, void *unused)
362 {
363     Py_ssize_t res;
364 
365     res = _PyObject_SIZE(Py_TYPE(st)) + _PyNode_SizeOf(st->st_node);
366     return PyLong_FromSsize_t(res);
367 }
368 
369 
370 /*  parser_st2tuple(PyObject* self, PyObject* args, PyObject* kw)
371  *
372  *  This provides conversion from a node* to a tuple object that can be
373  *  returned to the Python-level caller.  The ST object is not modified.
374  *
375  */
376 static PyObject*
parser_st2tuple(PyST_Object * self,PyObject * args,PyObject * kw)377 parser_st2tuple(PyST_Object *self, PyObject *args, PyObject *kw)
378 {
379     int line_info = 0;
380     int col_info = 0;
381     PyObject *res = 0;
382     int ok;
383 
384     static char *keywords[] = {"st", "line_info", "col_info", NULL};
385 
386     if (self == NULL || PyModule_Check(self)) {
387         ok = PyArg_ParseTupleAndKeywords(args, kw, "O!|pp:st2tuple", keywords,
388                                          &PyST_Type, &self, &line_info,
389                                          &col_info);
390     }
391     else
392         ok = PyArg_ParseTupleAndKeywords(args, kw, "|pp:totuple", &keywords[1],
393                                          &line_info, &col_info);
394     if (ok != 0) {
395         /*
396          *  Convert ST into a tuple representation.  Use Guido's function,
397          *  since it's known to work already.
398          */
399         res = node2tuple(((PyST_Object*)self)->st_node,
400                          PyTuple_New, PyTuple_SetItem, line_info, col_info);
401     }
402     return (res);
403 }
404 
405 
406 /*  parser_st2list(PyObject* self, PyObject* args, PyObject* kw)
407  *
408  *  This provides conversion from a node* to a list object that can be
409  *  returned to the Python-level caller.  The ST object is not modified.
410  *
411  */
412 static PyObject*
parser_st2list(PyST_Object * self,PyObject * args,PyObject * kw)413 parser_st2list(PyST_Object *self, PyObject *args, PyObject *kw)
414 {
415     int line_info = 0;
416     int col_info = 0;
417     PyObject *res = 0;
418     int ok;
419 
420     static char *keywords[] = {"st", "line_info", "col_info", NULL};
421 
422     if (self == NULL || PyModule_Check(self))
423         ok = PyArg_ParseTupleAndKeywords(args, kw, "O!|pp:st2list", keywords,
424                                          &PyST_Type, &self, &line_info,
425                                          &col_info);
426     else
427         ok = PyArg_ParseTupleAndKeywords(args, kw, "|pp:tolist", &keywords[1],
428                                          &line_info, &col_info);
429     if (ok) {
430         /*
431          *  Convert ST into a tuple representation.  Use Guido's function,
432          *  since it's known to work already.
433          */
434         res = node2tuple(self->st_node,
435                          PyList_New, PyList_SetItem, line_info, col_info);
436     }
437     return (res);
438 }
439 
440 
441 /*  parser_compilest(PyObject* self, PyObject* args)
442  *
443  *  This function creates code objects from the parse tree represented by
444  *  the passed-in data object.  An optional file name is passed in as well.
445  *
446  */
447 static PyObject*
parser_compilest(PyST_Object * self,PyObject * args,PyObject * kw)448 parser_compilest(PyST_Object *self, PyObject *args, PyObject *kw)
449 {
450     PyObject*     res = NULL;
451     PyArena*      arena = NULL;
452     mod_ty        mod;
453     PyObject*     filename = NULL;
454     int ok;
455 
456     static char *keywords[] = {"st", "filename", NULL};
457 
458     if (self == NULL || PyModule_Check(self))
459         ok = PyArg_ParseTupleAndKeywords(args, kw, "O!|O&:compilest", keywords,
460                                          &PyST_Type, &self,
461                                          PyUnicode_FSDecoder, &filename);
462     else
463         ok = PyArg_ParseTupleAndKeywords(args, kw, "|O&:compile", &keywords[1],
464                                          PyUnicode_FSDecoder, &filename);
465     if (!ok)
466         goto error;
467 
468     if (filename == NULL) {
469         filename = PyUnicode_FromString("<syntax-tree>");
470         if (filename == NULL)
471             goto error;
472     }
473 
474     arena = PyArena_New();
475     if (!arena)
476         goto error;
477 
478     mod = PyAST_FromNodeObject(self->st_node, &self->st_flags,
479                                filename, arena);
480     if (!mod)
481         goto error;
482 
483     res = (PyObject *)PyAST_CompileObject(mod, filename,
484                                           &self->st_flags, -1, arena);
485 error:
486     Py_XDECREF(filename);
487     if (arena != NULL)
488         PyArena_Free(arena);
489     return res;
490 }
491 
492 
493 /*  PyObject* parser_isexpr(PyObject* self, PyObject* args)
494  *  PyObject* parser_issuite(PyObject* self, PyObject* args)
495  *
496  *  Checks the passed-in ST object to determine if it is an expression or
497  *  a statement suite, respectively.  The return is a Python truth value.
498  *
499  */
500 static PyObject*
parser_isexpr(PyST_Object * self,PyObject * args,PyObject * kw)501 parser_isexpr(PyST_Object *self, PyObject *args, PyObject *kw)
502 {
503     PyObject* res = 0;
504     int ok;
505 
506     static char *keywords[] = {"st", NULL};
507 
508     if (self == NULL || PyModule_Check(self))
509         ok = PyArg_ParseTupleAndKeywords(args, kw, "O!:isexpr", keywords,
510                                          &PyST_Type, &self);
511     else
512         ok = PyArg_ParseTupleAndKeywords(args, kw, ":isexpr", &keywords[1]);
513 
514     if (ok) {
515         /* Check to see if the ST represents an expression or not. */
516         res = (self->st_type == PyST_EXPR) ? Py_True : Py_False;
517         Py_INCREF(res);
518     }
519     return (res);
520 }
521 
522 
523 static PyObject*
parser_issuite(PyST_Object * self,PyObject * args,PyObject * kw)524 parser_issuite(PyST_Object *self, PyObject *args, PyObject *kw)
525 {
526     PyObject* res = 0;
527     int ok;
528 
529     static char *keywords[] = {"st", NULL};
530 
531     if (self == NULL || PyModule_Check(self))
532         ok = PyArg_ParseTupleAndKeywords(args, kw, "O!:issuite", keywords,
533                                          &PyST_Type, &self);
534     else
535         ok = PyArg_ParseTupleAndKeywords(args, kw, ":issuite", &keywords[1]);
536 
537     if (ok) {
538         /* Check to see if the ST represents an expression or not. */
539         res = (self->st_type == PyST_EXPR) ? Py_False : Py_True;
540         Py_INCREF(res);
541     }
542     return (res);
543 }
544 
545 
546 /*  err_string(const char* message)
547  *
548  *  Sets the error string for an exception of type ParserError.
549  *
550  */
551 static void
err_string(const char * message)552 err_string(const char *message)
553 {
554     PyErr_SetString(parser_error, message);
555 }
556 
557 
558 /*  PyObject* parser_do_parse(PyObject* args, int type)
559  *
560  *  Internal function to actually execute the parse and return the result if
561  *  successful or set an exception if not.
562  *
563  */
564 static PyObject*
parser_do_parse(PyObject * args,PyObject * kw,const char * argspec,int type)565 parser_do_parse(PyObject *args, PyObject *kw, const char *argspec, int type)
566 {
567     char*     string = 0;
568     PyObject* res    = 0;
569     int flags        = 0;
570     perrdetail err;
571 
572     static char *keywords[] = {"source", NULL};
573 
574     if (PyArg_ParseTupleAndKeywords(args, kw, argspec, keywords, &string)) {
575         node* n = PyParser_ParseStringFlagsFilenameEx(string, NULL,
576                                                        &_PyParser_Grammar,
577                                                       (type == PyST_EXPR)
578                                                       ? eval_input : file_input,
579                                                       &err, &flags);
580 
581         if (n) {
582             res = parser_newstobject(n, type);
583             if (res) {
584                 ((PyST_Object *)res)->st_flags.cf_flags = flags & PyCF_MASK;
585                 ((PyST_Object *)res)->st_flags.cf_feature_version = PY_MINOR_VERSION;
586             }
587         }
588         else {
589             PyParser_SetError(&err);
590         }
591         PyParser_ClearError(&err);
592     }
593     return (res);
594 }
595 
596 
597 /*  PyObject* parser_expr(PyObject* self, PyObject* args)
598  *  PyObject* parser_suite(PyObject* self, PyObject* args)
599  *
600  *  External interfaces to the parser itself.  Which is called determines if
601  *  the parser attempts to recognize an expression ('eval' form) or statement
602  *  suite ('exec' form).  The real work is done by parser_do_parse() above.
603  *
604  */
605 static PyObject*
parser_expr(PyST_Object * self,PyObject * args,PyObject * kw)606 parser_expr(PyST_Object *self, PyObject *args, PyObject *kw)
607 {
608     NOTE(ARGUNUSED(self))
609     return (parser_do_parse(args, kw, "s:expr", PyST_EXPR));
610 }
611 
612 
613 static PyObject*
parser_suite(PyST_Object * self,PyObject * args,PyObject * kw)614 parser_suite(PyST_Object *self, PyObject *args, PyObject *kw)
615 {
616     NOTE(ARGUNUSED(self))
617     return (parser_do_parse(args, kw, "s:suite", PyST_SUITE));
618 }
619 
620 
621 
622 /*  This is the messy part of the code.  Conversion from a tuple to an ST
623  *  object requires that the input tuple be valid without having to rely on
624  *  catching an exception from the compiler.  This is done to allow the
625  *  compiler itself to remain fast, since most of its input will come from
626  *  the parser directly, and therefore be known to be syntactically correct.
627  *  This validation is done to ensure that we don't core dump the compile
628  *  phase, returning an exception instead.
629  *
630  *  Two aspects can be broken out in this code:  creating a node tree from
631  *  the tuple passed in, and verifying that it is indeed valid.  It may be
632  *  advantageous to expand the number of ST types to include funcdefs and
633  *  lambdadefs to take advantage of the optimizer, recognizing those STs
634  *  here.  They are not necessary, and not quite as useful in a raw form.
635  *  For now, let's get expressions and suites working reliably.
636  */
637 
638 
639 static node* build_node_tree(PyObject *tuple);
640 
641 static int
validate_node(node * tree)642 validate_node(node *tree)
643 {
644     int type = TYPE(tree);
645     int nch = NCH(tree);
646     state *dfa_state;
647     int pos, arc;
648 
649     assert(ISNONTERMINAL(type));
650     type -= NT_OFFSET;
651     if (type >= _PyParser_Grammar.g_ndfas) {
652         PyErr_Format(parser_error, "Unrecognized node type %d.", TYPE(tree));
653         return 0;
654     }
655     const dfa *nt_dfa = &_PyParser_Grammar.g_dfa[type];
656     REQ(tree, nt_dfa->d_type);
657 
658     /* Run the DFA for this nonterminal. */
659     dfa_state = nt_dfa->d_state;
660     for (pos = 0; pos < nch; ++pos) {
661         node *ch = CHILD(tree, pos);
662         int ch_type = TYPE(ch);
663         if ((ch_type >= NT_OFFSET + _PyParser_Grammar.g_ndfas)
664             || (ISTERMINAL(ch_type) && (ch_type >= N_TOKENS))
665             || (ch_type < 0)
666            ) {
667             PyErr_Format(parser_error, "Unrecognized node type %d.", ch_type);
668             return 0;
669         }
670         if (ch_type == suite && TYPE(tree) == funcdef) {
671             /* This is the opposite hack of what we do in parser.c
672                (search for func_body_suite), except we don't ever
673                support type comments here. */
674             ch_type = func_body_suite;
675         }
676         for (arc = 0; arc < dfa_state->s_narcs; ++arc) {
677             short a_label = dfa_state->s_arc[arc].a_lbl;
678             assert(a_label < _PyParser_Grammar.g_ll.ll_nlabels);
679 
680             const char *label_str = _PyParser_Grammar.g_ll.ll_label[a_label].lb_str;
681             if ((_PyParser_Grammar.g_ll.ll_label[a_label].lb_type == ch_type)
682                 && ((ch->n_str == NULL) || (label_str == NULL)
683                      || (strcmp(ch->n_str, label_str) == 0))
684                ) {
685                 /* The child is acceptable; if non-terminal, validate it recursively. */
686                 if (ISNONTERMINAL(ch_type) && !validate_node(ch))
687                     return 0;
688 
689                 /* Update the state, and move on to the next child. */
690                 dfa_state = &nt_dfa->d_state[dfa_state->s_arc[arc].a_arrow];
691                 goto arc_found;
692             }
693         }
694         /* What would this state have accepted? */
695         {
696             short a_label = dfa_state->s_arc->a_lbl;
697             if (!a_label) /* Wouldn't accept any more children */
698                 goto illegal_num_children;
699 
700             int next_type = _PyParser_Grammar.g_ll.ll_label[a_label].lb_type;
701             const char *expected_str = _PyParser_Grammar.g_ll.ll_label[a_label].lb_str;
702 
703             if (ISNONTERMINAL(next_type)) {
704                 PyErr_Format(parser_error, "Expected %s, got %s.",
705                              _PyParser_Grammar.g_dfa[next_type - NT_OFFSET].d_name,
706                              ISTERMINAL(ch_type) ? _PyParser_TokenNames[ch_type] :
707                              _PyParser_Grammar.g_dfa[ch_type - NT_OFFSET].d_name);
708             }
709             else if (expected_str != NULL) {
710                 PyErr_Format(parser_error, "Illegal terminal: expected '%s'.",
711                              expected_str);
712             }
713             else {
714                 PyErr_Format(parser_error, "Illegal terminal: expected %s.",
715                              _PyParser_TokenNames[next_type]);
716             }
717             return 0;
718         }
719 
720 arc_found:
721         continue;
722     }
723     /* Are we in a final state? If so, return 1 for successful validation. */
724     for (arc = 0; arc < dfa_state->s_narcs; ++arc) {
725         if (!dfa_state->s_arc[arc].a_lbl) {
726             return 1;
727         }
728     }
729 
730 illegal_num_children:
731     PyErr_Format(parser_error,
732                  "Illegal number of children for %s node.", nt_dfa->d_name);
733     return 0;
734 }
735 
736 /*  PyObject* parser_tuple2st(PyObject* self, PyObject* args)
737  *
738  *  This is the public function, called from the Python code.  It receives a
739  *  single tuple object from the caller, and creates an ST object if the
740  *  tuple can be validated.  It does this by checking the first code of the
741  *  tuple, and, if acceptable, builds the internal representation.  If this
742  *  step succeeds, the internal representation is validated as fully as
743  *  possible with the recursive validate_node() routine defined above.
744  *
745  *  This function must be changed if support is to be added for PyST_FRAGMENT
746  *  ST objects.
747  *
748  */
749 static PyObject*
parser_tuple2st(PyST_Object * self,PyObject * args,PyObject * kw)750 parser_tuple2st(PyST_Object *self, PyObject *args, PyObject *kw)
751 {
752     NOTE(ARGUNUSED(self))
753     PyObject *st = 0;
754     PyObject *tuple;
755     node *tree;
756 
757     static char *keywords[] = {"sequence", NULL};
758 
759     if (!PyArg_ParseTupleAndKeywords(args, kw, "O:sequence2st", keywords,
760                                      &tuple))
761         return (0);
762     if (!PySequence_Check(tuple)) {
763         PyErr_SetString(PyExc_ValueError,
764                         "sequence2st() requires a single sequence argument");
765         return (0);
766     }
767     /*
768      *  Convert the tree to the internal form before checking it.
769      */
770     tree = build_node_tree(tuple);
771     if (tree != 0) {
772         node *validation_root = NULL;
773         int tree_type = 0;
774         switch (TYPE(tree)) {
775         case eval_input:
776             /*  Might be an eval form.  */
777             tree_type = PyST_EXPR;
778             validation_root = tree;
779             break;
780         case encoding_decl:
781             /* This looks like an encoding_decl so far. */
782             if (NCH(tree) == 1) {
783                 tree_type = PyST_SUITE;
784                 validation_root = CHILD(tree, 0);
785             }
786             else {
787                 err_string("Error Parsing encoding_decl");
788             }
789             break;
790         case file_input:
791             /*  This looks like an exec form so far.  */
792             tree_type = PyST_SUITE;
793             validation_root = tree;
794             break;
795         default:
796             /*  This is a fragment, at best. */
797             err_string("parse tree does not use a valid start symbol");
798         }
799 
800         if (validation_root != NULL && validate_node(validation_root))
801             st = parser_newstobject(tree, tree_type);
802         else
803             PyNode_Free(tree);
804     }
805     /*  Make sure we raise an exception on all errors.  We should never
806      *  get this, but we'd do well to be sure something is done.
807      */
808     if (st == NULL && !PyErr_Occurred())
809         err_string("unspecified ST error occurred");
810 
811     return st;
812 }
813 
814 
815 /*  node* build_node_children()
816  *
817  *  Iterate across the children of the current non-terminal node and build
818  *  their structures.  If successful, return the root of this portion of
819  *  the tree, otherwise, 0.  Any required exception will be specified already,
820  *  and no memory will have been deallocated.
821  *
822  */
823 static node*
build_node_children(PyObject * tuple,node * root,int * line_num)824 build_node_children(PyObject *tuple, node *root, int *line_num)
825 {
826     Py_ssize_t len = PyObject_Size(tuple);
827     Py_ssize_t i;
828     int  err;
829 
830     if (len < 0) {
831         return NULL;
832     }
833     for (i = 1; i < len; ++i) {
834         /* elem must always be a sequence, however simple */
835         PyObject* elem = PySequence_GetItem(tuple, i);
836         int ok = elem != NULL;
837         int type = 0;
838         char *strn = 0;
839 
840         if (ok)
841             ok = PySequence_Check(elem);
842         if (ok) {
843             PyObject *temp = PySequence_GetItem(elem, 0);
844             if (temp == NULL)
845                 ok = 0;
846             else {
847                 ok = PyLong_Check(temp);
848                 if (ok) {
849                     type = _PyLong_AsInt(temp);
850                     if (type == -1 && PyErr_Occurred()) {
851                         Py_DECREF(temp);
852                         Py_DECREF(elem);
853                         return NULL;
854                     }
855                 }
856                 Py_DECREF(temp);
857             }
858         }
859         if (!ok) {
860             PyObject *err = Py_BuildValue("Os", elem,
861                                           "Illegal node construct.");
862             PyErr_SetObject(parser_error, err);
863             Py_XDECREF(err);
864             Py_XDECREF(elem);
865             return NULL;
866         }
867         if (ISTERMINAL(type)) {
868             Py_ssize_t len = PyObject_Size(elem);
869             PyObject *temp;
870             const char *temp_str;
871 
872             if ((len != 2) && (len != 3)) {
873                 err_string("terminal nodes must have 2 or 3 entries");
874                 Py_DECREF(elem);
875                 return NULL;
876             }
877             temp = PySequence_GetItem(elem, 1);
878             if (temp == NULL) {
879                 Py_DECREF(elem);
880                 return NULL;
881             }
882             if (!PyUnicode_Check(temp)) {
883                 PyErr_Format(parser_error,
884                              "second item in terminal node must be a string,"
885                              " found %s",
886                              Py_TYPE(temp)->tp_name);
887                 Py_DECREF(temp);
888                 Py_DECREF(elem);
889                 return NULL;
890             }
891             if (len == 3) {
892                 PyObject *o = PySequence_GetItem(elem, 2);
893                 if (o == NULL) {
894                     Py_DECREF(temp);
895                     Py_DECREF(elem);
896                     return NULL;
897                 }
898                 if (PyLong_Check(o)) {
899                     int num = _PyLong_AsInt(o);
900                     if (num == -1 && PyErr_Occurred()) {
901                         Py_DECREF(o);
902                         Py_DECREF(temp);
903                         Py_DECREF(elem);
904                         return NULL;
905                     }
906                     *line_num = num;
907                 }
908                 else {
909                     PyErr_Format(parser_error,
910                                  "third item in terminal node must be an"
911                                  " integer, found %s",
912                                  Py_TYPE(temp)->tp_name);
913                     Py_DECREF(o);
914                     Py_DECREF(temp);
915                     Py_DECREF(elem);
916                     return NULL;
917                 }
918                 Py_DECREF(o);
919             }
920             temp_str = PyUnicode_AsUTF8AndSize(temp, &len);
921             if (temp_str == NULL) {
922                 Py_DECREF(temp);
923                 Py_DECREF(elem);
924                 return NULL;
925             }
926             strn = (char *)PyObject_MALLOC(len + 1);
927             if (strn == NULL) {
928                 Py_DECREF(temp);
929                 Py_DECREF(elem);
930                 PyErr_NoMemory();
931                 return NULL;
932             }
933             (void) memcpy(strn, temp_str, len + 1);
934             Py_DECREF(temp);
935         }
936         else if (!ISNONTERMINAL(type)) {
937             /*
938              *  It has to be one or the other; this is an error.
939              *  Raise an exception.
940              */
941             PyObject *err = Py_BuildValue("Os", elem, "unknown node type.");
942             PyErr_SetObject(parser_error, err);
943             Py_XDECREF(err);
944             Py_DECREF(elem);
945             return NULL;
946         }
947         err = PyNode_AddChild(root, type, strn, *line_num, 0, *line_num, 0);
948         if (err == E_NOMEM) {
949             Py_DECREF(elem);
950             PyObject_FREE(strn);
951             PyErr_NoMemory();
952             return NULL;
953         }
954         if (err == E_OVERFLOW) {
955             Py_DECREF(elem);
956             PyObject_FREE(strn);
957             PyErr_SetString(PyExc_ValueError,
958                             "unsupported number of child nodes");
959             return NULL;
960         }
961 
962         if (ISNONTERMINAL(type)) {
963             node* new_child = CHILD(root, i - 1);
964 
965             if (new_child != build_node_children(elem, new_child, line_num)) {
966                 Py_DECREF(elem);
967                 return NULL;
968             }
969         }
970         else if (type == NEWLINE) {     /* It's true:  we increment the     */
971             ++(*line_num);              /* line number *after* the newline! */
972         }
973         Py_DECREF(elem);
974     }
975     return root;
976 }
977 
978 
979 static node*
build_node_tree(PyObject * tuple)980 build_node_tree(PyObject *tuple)
981 {
982     node* res = 0;
983     PyObject *temp = PySequence_GetItem(tuple, 0);
984     long num = -1;
985 
986     if (temp != NULL)
987         num = PyLong_AsLong(temp);
988     Py_XDECREF(temp);
989     if (ISTERMINAL(num)) {
990         /*
991          *  The tuple is simple, but it doesn't start with a start symbol.
992          *  Raise an exception now and be done with it.
993          */
994         tuple = Py_BuildValue("Os", tuple,
995                     "Illegal syntax-tree; cannot start with terminal symbol.");
996         PyErr_SetObject(parser_error, tuple);
997         Py_XDECREF(tuple);
998     }
999     else if (ISNONTERMINAL(num)) {
1000         /*
1001          *  Not efficient, but that can be handled later.
1002          */
1003         int line_num = 0;
1004         PyObject *encoding = NULL;
1005 
1006         if (num == encoding_decl) {
1007             encoding = PySequence_GetItem(tuple, 2);
1008             if (encoding == NULL) {
1009                 PyErr_SetString(parser_error, "missed encoding");
1010                 return NULL;
1011             }
1012             if (!PyUnicode_Check(encoding)) {
1013                 PyErr_Format(parser_error,
1014                              "encoding must be a string, found %.200s",
1015                              Py_TYPE(encoding)->tp_name);
1016                 Py_DECREF(encoding);
1017                 return NULL;
1018             }
1019             /* tuple isn't borrowed anymore here, need to DECREF */
1020             tuple = PySequence_GetSlice(tuple, 0, 2);
1021             if (tuple == NULL) {
1022                 Py_DECREF(encoding);
1023                 return NULL;
1024             }
1025         }
1026         res = PyNode_New(num);
1027         if (res != NULL) {
1028             if (res != build_node_children(tuple, res, &line_num)) {
1029                 PyNode_Free(res);
1030                 res = NULL;
1031             }
1032             if (res && encoding) {
1033                 Py_ssize_t len;
1034                 const char *temp;
1035                 temp = PyUnicode_AsUTF8AndSize(encoding, &len);
1036                 if (temp == NULL) {
1037                     PyNode_Free(res);
1038                     Py_DECREF(encoding);
1039                     Py_DECREF(tuple);
1040                     return NULL;
1041                 }
1042                 res->n_str = (char *)PyObject_MALLOC(len + 1);
1043                 if (res->n_str == NULL) {
1044                     PyNode_Free(res);
1045                     Py_DECREF(encoding);
1046                     Py_DECREF(tuple);
1047                     PyErr_NoMemory();
1048                     return NULL;
1049                 }
1050                 (void) memcpy(res->n_str, temp, len + 1);
1051             }
1052         }
1053         if (encoding != NULL) {
1054             Py_DECREF(encoding);
1055             Py_DECREF(tuple);
1056         }
1057     }
1058     else {
1059         /*  The tuple is illegal -- if the number is neither TERMINAL nor
1060          *  NONTERMINAL, we can't use it.  Not sure the implementation
1061          *  allows this condition, but the API doesn't preclude it.
1062          */
1063         PyObject *err = Py_BuildValue("Os", tuple,
1064                                       "Illegal component tuple.");
1065         PyErr_SetObject(parser_error, err);
1066         Py_XDECREF(err);
1067     }
1068 
1069     return (res);
1070 }
1071 
1072 
1073 static PyObject*
1074 pickle_constructor = NULL;
1075 
1076 
1077 static PyObject*
parser__pickler(PyObject * self,PyObject * args)1078 parser__pickler(PyObject *self, PyObject *args)
1079 {
1080     NOTE(ARGUNUSED(self))
1081     PyObject *result = NULL;
1082     PyObject *st = NULL;
1083     PyObject *empty_dict = NULL;
1084 
1085     if (PyArg_ParseTuple(args, "O!:_pickler", &PyST_Type, &st)) {
1086         PyObject *newargs;
1087         PyObject *tuple;
1088 
1089         if ((empty_dict = PyDict_New()) == NULL)
1090             goto finally;
1091         if ((newargs = Py_BuildValue("Oi", st, 1)) == NULL)
1092             goto finally;
1093         tuple = parser_st2tuple((PyST_Object*)NULL, newargs, empty_dict);
1094         if (tuple != NULL) {
1095             result = Py_BuildValue("O(O)", pickle_constructor, tuple);
1096             Py_DECREF(tuple);
1097         }
1098         Py_DECREF(newargs);
1099     }
1100   finally:
1101     Py_XDECREF(empty_dict);
1102 
1103     return (result);
1104 }
1105 
1106 
1107 /*  Functions exported by this module.  Most of this should probably
1108  *  be converted into an ST object with methods, but that is better
1109  *  done directly in Python, allowing subclasses to be created directly.
1110  *  We'd really have to write a wrapper around it all anyway to allow
1111  *  inheritance.
1112  */
1113 static PyMethodDef parser_functions[] =  {
1114     {"compilest",      (PyCFunction)(void(*)(void))parser_compilest,  PUBLIC_METHOD_TYPE,
1115         PyDoc_STR("Compiles an ST object into a code object.")},
1116     {"expr",            (PyCFunction)(void(*)(void))parser_expr,      PUBLIC_METHOD_TYPE,
1117         PyDoc_STR("Creates an ST object from an expression.")},
1118     {"isexpr",          (PyCFunction)(void(*)(void))parser_isexpr,    PUBLIC_METHOD_TYPE,
1119         PyDoc_STR("Determines if an ST object was created from an expression.")},
1120     {"issuite",         (PyCFunction)(void(*)(void))parser_issuite,   PUBLIC_METHOD_TYPE,
1121         PyDoc_STR("Determines if an ST object was created from a suite.")},
1122     {"suite",           (PyCFunction)(void(*)(void))parser_suite,     PUBLIC_METHOD_TYPE,
1123         PyDoc_STR("Creates an ST object from a suite.")},
1124     {"sequence2st",     (PyCFunction)(void(*)(void))parser_tuple2st,  PUBLIC_METHOD_TYPE,
1125         PyDoc_STR("Creates an ST object from a tree representation.")},
1126     {"st2tuple",        (PyCFunction)(void(*)(void))parser_st2tuple,  PUBLIC_METHOD_TYPE,
1127         PyDoc_STR("Creates a tuple-tree representation of an ST.")},
1128     {"st2list",         (PyCFunction)(void(*)(void))parser_st2list,   PUBLIC_METHOD_TYPE,
1129         PyDoc_STR("Creates a list-tree representation of an ST.")},
1130     {"tuple2st",        (PyCFunction)(void(*)(void))parser_tuple2st,  PUBLIC_METHOD_TYPE,
1131         PyDoc_STR("Creates an ST object from a tree representation.")},
1132 
1133     /* private stuff: support pickle module */
1134     {"_pickler",        (PyCFunction)parser__pickler,  METH_VARARGS,
1135         PyDoc_STR("Returns the pickle magic to allow ST objects to be pickled.")},
1136 
1137     {NULL, NULL, 0, NULL}
1138     };
1139 
1140 
1141 
1142 static struct PyModuleDef parsermodule = {
1143         PyModuleDef_HEAD_INIT,
1144         "parser",
1145         NULL,
1146         -1,
1147         parser_functions,
1148         NULL,
1149         NULL,
1150         NULL,
1151         NULL
1152 };
1153 
1154 PyMODINIT_FUNC PyInit_parser(void);  /* supply a prototype */
1155 
1156 PyMODINIT_FUNC
PyInit_parser(void)1157 PyInit_parser(void)
1158 {
1159     PyObject *module, *copyreg;
1160 
1161     if (PyType_Ready(&PyST_Type) < 0)
1162         return NULL;
1163     module = PyModule_Create(&parsermodule);
1164     if (module == NULL)
1165         return NULL;
1166 
1167     if (parser_error == 0)
1168         parser_error = PyErr_NewException("parser.ParserError", NULL, NULL);
1169 
1170     if (parser_error == 0)
1171         return NULL;
1172     /* CAUTION:  The code next used to skip bumping the refcount on
1173      * parser_error.  That's a disaster if PyInit_parser() gets called more
1174      * than once.  By incref'ing, we ensure that each module dict that
1175      * gets created owns its reference to the shared parser_error object,
1176      * and the file static parser_error vrbl owns a reference too.
1177      */
1178     Py_INCREF(parser_error);
1179     if (PyModule_AddObject(module, "ParserError", parser_error) != 0)
1180         return NULL;
1181 
1182     Py_INCREF(&PyST_Type);
1183     PyModule_AddObject(module, "STType", (PyObject*)&PyST_Type);
1184 
1185     PyModule_AddStringConstant(module, "__copyright__",
1186                                parser_copyright_string);
1187     PyModule_AddStringConstant(module, "__doc__",
1188                                parser_doc_string);
1189     PyModule_AddStringConstant(module, "__version__",
1190                                parser_version_string);
1191 
1192     /* Register to support pickling.
1193      * If this fails, the import of this module will fail because an
1194      * exception will be raised here; should we clear the exception?
1195      */
1196     copyreg = PyImport_ImportModuleNoBlock("copyreg");
1197     if (copyreg != NULL) {
1198         PyObject *func, *pickler;
1199         _Py_IDENTIFIER(pickle);
1200         _Py_IDENTIFIER(sequence2st);
1201         _Py_IDENTIFIER(_pickler);
1202 
1203         func = _PyObject_GetAttrId(copyreg, &PyId_pickle);
1204         pickle_constructor = _PyObject_GetAttrId(module, &PyId_sequence2st);
1205         pickler = _PyObject_GetAttrId(module, &PyId__pickler);
1206         Py_XINCREF(pickle_constructor);
1207         if ((func != NULL) && (pickle_constructor != NULL)
1208             && (pickler != NULL)) {
1209             PyObject *res;
1210 
1211             res = PyObject_CallFunctionObjArgs(func, &PyST_Type, pickler,
1212                                                pickle_constructor, NULL);
1213             Py_XDECREF(res);
1214         }
1215         Py_XDECREF(func);
1216         Py_XDECREF(pickle_constructor);
1217         Py_XDECREF(pickler);
1218         Py_DECREF(copyreg);
1219     }
1220     return module;
1221 }
1222