1 #include "Python.h"
2 #include "../Include/Python-ast.h"
3 #include "../Include/compile.h"
4 #include "../Include/node.h"
5 #include "../Include/grammar.h"
6 #include "../Include/token.h"
7 #include "../Include/ast.h"
8 #include "../Include/parsetok.h"
9 #include "../Include/errcode.h"
10 #include "../Include/graminit.h"
11 
12 extern grammar _Ta27Parser_Grammar; /* from graminit.c */
13 
14 // from Python/bltinmodule.c
15 static const char *
source_as_string(PyObject * cmd,const char * funcname,const char * what,PyCompilerFlags * cf,PyObject ** cmd_copy)16 source_as_string(PyObject *cmd, const char *funcname, const char *what, PyCompilerFlags *cf, PyObject **cmd_copy)
17 {
18     const char *str;
19     Py_ssize_t size;
20     Py_buffer view;
21 
22     *cmd_copy = NULL;
23     if (PyUnicode_Check(cmd)) {
24         cf->cf_flags |= PyCF_IGNORE_COOKIE;
25         str = PyUnicode_AsUTF8AndSize(cmd, &size);
26         if (str == NULL)
27             return NULL;
28     }
29     else if (PyBytes_Check(cmd)) {
30         str = PyBytes_AS_STRING(cmd);
31         size = PyBytes_GET_SIZE(cmd);
32     }
33     else if (PyByteArray_Check(cmd)) {
34         str = PyByteArray_AS_STRING(cmd);
35         size = PyByteArray_GET_SIZE(cmd);
36     }
37     else if (PyObject_GetBuffer(cmd, &view, PyBUF_SIMPLE) == 0) {
38         /* Copy to NUL-terminated buffer. */
39         *cmd_copy = PyBytes_FromStringAndSize(
40             (const char *)view.buf, view.len);
41         PyBuffer_Release(&view);
42         if (*cmd_copy == NULL) {
43             return NULL;
44         }
45         str = PyBytes_AS_STRING(*cmd_copy);
46         size = PyBytes_GET_SIZE(*cmd_copy);
47     }
48     else {
49         PyErr_Format(PyExc_TypeError,
50           "%s() arg 1 must be a %s object",
51           funcname, what);
52         return NULL;
53     }
54 
55     if (strlen(str) != (size_t)size) {
56         PyErr_SetString(PyExc_ValueError,
57                         "source code string cannot contain null bytes");
58         Py_CLEAR(*cmd_copy);
59         return NULL;
60     }
61     return str;
62 }
63 
64 // from Python/pythonrun.c
65 /* compute parser flags based on compiler flags */
PARSER_FLAGS(PyCompilerFlags * flags)66 static int PARSER_FLAGS(PyCompilerFlags *flags)
67 {
68     int parser_flags = 0;
69     if (!flags)
70         return 0;
71     if (flags->cf_flags & PyCF_DONT_IMPLY_DEDENT)
72         parser_flags |= PyPARSE_DONT_IMPLY_DEDENT;
73     if (flags->cf_flags & PyCF_IGNORE_COOKIE)
74         parser_flags |= PyPARSE_IGNORE_COOKIE;
75     return parser_flags;
76 }
77 
78 // from Python/pythonrun.c
79 /* Set the error appropriate to the given input error code (see errcode.h) */
80 static void
err_input(perrdetail * err)81 err_input(perrdetail *err)
82 {
83     PyObject *v, *w, *errtype, *errtext;
84     PyObject *msg_obj = NULL;
85     char *msg = NULL;
86     int offset = err->offset;
87 
88     errtype = PyExc_SyntaxError;
89     switch (err->error) {
90     case E_ERROR:
91         return;
92     case E_SYNTAX:
93         errtype = PyExc_IndentationError;
94         if (err->expected == INDENT)
95             msg = "expected an indented block";
96         else if (err->token == INDENT)
97             msg = "unexpected indent";
98         else if (err->token == DEDENT)
99             msg = "unexpected unindent";
100         else {
101             errtype = PyExc_SyntaxError;
102             if (err->token == TYPE_COMMENT)
103               msg = "misplaced type annotation";
104             else
105               msg = "invalid syntax";
106         }
107         break;
108     case E_TOKEN:
109         msg = "invalid token";
110         break;
111     case E_EOFS:
112         msg = "EOF while scanning triple-quoted string literal";
113         break;
114     case E_EOLS:
115         msg = "EOL while scanning string literal";
116         break;
117     case E_INTR:
118         if (!PyErr_Occurred())
119             PyErr_SetNone(PyExc_KeyboardInterrupt);
120         goto cleanup;
121     case E_NOMEM:
122         PyErr_NoMemory();
123         goto cleanup;
124     case E_EOF:
125         msg = "unexpected EOF while parsing";
126         break;
127     case E_TABSPACE:
128         errtype = PyExc_TabError;
129         msg = "inconsistent use of tabs and spaces in indentation";
130         break;
131     case E_OVERFLOW:
132         msg = "expression too long";
133         break;
134     case E_DEDENT:
135         errtype = PyExc_IndentationError;
136         msg = "unindent does not match any outer indentation level";
137         break;
138     case E_TOODEEP:
139         errtype = PyExc_IndentationError;
140         msg = "too many levels of indentation";
141         break;
142     case E_DECODE: {
143         PyObject *type, *value, *tb;
144         PyErr_Fetch(&type, &value, &tb);
145         msg = "unknown decode error";
146         if (value != NULL)
147             msg_obj = PyObject_Str(value);
148         Py_XDECREF(type);
149         Py_XDECREF(value);
150         Py_XDECREF(tb);
151         break;
152     }
153     case E_LINECONT:
154         msg = "unexpected character after line continuation character";
155         break;
156     default:
157         fprintf(stderr, "error=%d\n", err->error);
158         msg = "unknown parsing error";
159         break;
160     }
161     /* err->text may not be UTF-8 in case of decoding errors.
162        Explicitly convert to an object. */
163     if (!err->text) {
164         errtext = Py_None;
165         Py_INCREF(Py_None);
166     } else {
167         errtext = PyUnicode_DecodeUTF8(err->text, err->offset,
168                                        "replace");
169         if (errtext != NULL) {
170             Py_ssize_t len = strlen(err->text);
171             offset = (int)PyUnicode_GET_LENGTH(errtext);
172             if (len != err->offset) {
173                 Py_DECREF(errtext);
174                 errtext = PyUnicode_DecodeUTF8(err->text, len,
175                                                "replace");
176             }
177         }
178     }
179     v = Py_BuildValue("(OiiN)", err->filename,
180                       err->lineno, offset, errtext);
181     if (v != NULL) {
182         if (msg_obj)
183             w = Py_BuildValue("(OO)", msg_obj, v);
184         else
185             w = Py_BuildValue("(sO)", msg, v);
186     } else
187         w = NULL;
188     Py_XDECREF(v);
189     PyErr_SetObject(errtype, w);
190     Py_XDECREF(w);
191 cleanup:
192     Py_XDECREF(msg_obj);
193     if (err->text != NULL) {
194         PyObject_FREE(err->text);
195         err->text = NULL;
196     }
197 }
198 
199 // from Python/pythonrun.c
200 static void
err_free(perrdetail * err)201 err_free(perrdetail *err)
202 {
203     Py_CLEAR(err->filename);
204 }
205 
206 // copy of PyParser_ASTFromStringObject in Python/pythonrun.c
207 /* Preferred access to parser is through AST. */
208 static mod_ty
string_object_to_c_ast(const char * s,PyObject * filename,int start,PyCompilerFlags * flags,PyArena * arena)209 string_object_to_c_ast(const char *s, PyObject *filename, int start,
210                              PyCompilerFlags *flags, PyArena *arena)
211 {
212     mod_ty mod;
213     PyCompilerFlags localflags;
214     perrdetail err;
215     int iflags = PARSER_FLAGS(flags);
216 
217     node *n = Ta27Parser_ParseStringObject(s, filename,
218                                          &_Ta27Parser_Grammar, start, &err,
219                                          &iflags);
220     if (flags == NULL) {
221         localflags.cf_flags = 0;
222         flags = &localflags;
223     }
224     if (n) {
225         flags->cf_flags |= iflags & PyCF_MASK;
226         mod = Ta27AST_FromNode(n, flags, PyUnicode_AsUTF8(filename), arena);
227         Ta27Node_Free(n);
228     }
229     else {
230         err_input(&err);
231         mod = NULL;
232     }
233     err_free(&err);
234     return mod;
235 }
236 
237 // adapted from Py_CompileStringObject in Python/pythonrun.c
238 static PyObject *
string_object_to_py_ast(const char * str,PyObject * filename,int start,PyCompilerFlags * flags)239 string_object_to_py_ast(const char *str, PyObject *filename, int start,
240                        PyCompilerFlags *flags)
241 {
242     mod_ty mod;
243     PyObject *result;
244     PyArena *arena = PyArena_New();
245     if (arena == NULL)
246         return NULL;
247 
248     mod = string_object_to_c_ast(str, filename, start, flags, arena);
249     if (mod == NULL) {
250         PyArena_Free(arena);
251         return NULL;
252     }
253 
254     result = Ta27AST_mod2obj(mod);
255     PyArena_Free(arena);
256     return result;
257 }
258 
259 // adapted from builtin_compile_impl in Python/bltinmodule.c
260 static PyObject *
ast27_parse_impl(PyObject * source,PyObject * filename,const char * mode)261 ast27_parse_impl(PyObject *source,
262                  PyObject *filename, const char *mode)
263 {
264     PyObject *source_copy;
265     const char *str;
266     int compile_mode = -1;
267     PyCompilerFlags cf;
268     int start[] = {file_input, eval_input, single_input, func_type_input };
269     PyObject *result;
270 
271     cf.cf_flags = PyCF_ONLY_AST | PyCF_SOURCE_IS_UTF8;
272 
273     if (strcmp(mode, "exec") == 0)
274         compile_mode = 0;
275     else if (strcmp(mode, "eval") == 0)
276         compile_mode = 1;
277     else if (strcmp(mode, "single") == 0)
278         compile_mode = 2;
279     else if (strcmp(mode, "func_type") == 0)
280         compile_mode = 3;
281     else {
282         PyErr_SetString(PyExc_ValueError,
283                         "parse() mode must be 'exec', 'eval', 'single', for 'func_type'");
284         goto error;
285     }
286 
287     str = source_as_string(source, "parse", "string or bytes", &cf, &source_copy);
288     if (str == NULL)
289         goto error;
290 
291     result = string_object_to_py_ast(str, filename, start[compile_mode], &cf);
292     Py_XDECREF(source_copy);
293     goto finally;
294 
295 error:
296     result = NULL;
297 finally:
298     Py_DECREF(filename);
299     return result;
300 }
301 
302 // adapted from builtin_compile in Python/clinic/bltinmodule.c.h
303 PyObject *
ast27_parse(PyObject * self,PyObject * args)304 ast27_parse(PyObject *self, PyObject *args)
305 {
306     PyObject *return_value = NULL;
307     PyObject *source;
308     PyObject *filename;
309     const char *mode;
310 
311     if (PyArg_ParseTuple(args, "OO&s:parse", &source, PyUnicode_FSDecoder, &filename, &mode))
312         return_value = ast27_parse_impl(source, filename, mode);
313 
314     return return_value;
315 }
316