1
2 /* Parser-tokenizer link implementation */
3
4 #include "Python.h"
5 #include "tokenizer.h"
6 #include "node.h"
7 #include "grammar.h"
8 #include "parser.h"
9 #include "parsetok.h"
10 #include "errcode.h"
11 #include "graminit.h"
12
13
14 /* Forward */
15 static node *parsetok(struct tok_state *, grammar *, int, perrdetail *, int *);
16 static int initerr(perrdetail *err_ret, PyObject * filename);
17
18 typedef struct {
19 struct {
20 int lineno;
21 char *comment;
22 } *items;
23 size_t size;
24 size_t num_items;
25 } growable_comment_array;
26
27 static int
growable_comment_array_init(growable_comment_array * arr,size_t initial_size)28 growable_comment_array_init(growable_comment_array *arr, size_t initial_size) {
29 assert(initial_size > 0);
30 arr->items = malloc(initial_size * sizeof(*arr->items));
31 arr->size = initial_size;
32 arr->num_items = 0;
33
34 return arr->items != NULL;
35 }
36
37 static int
growable_comment_array_add(growable_comment_array * arr,int lineno,char * comment)38 growable_comment_array_add(growable_comment_array *arr, int lineno, char *comment) {
39 if (arr->num_items >= arr->size) {
40 arr->size *= 2;
41 arr->items = realloc(arr->items, arr->size * sizeof(*arr->items));
42 if (!arr->items) {
43 return 0;
44 }
45 }
46
47 arr->items[arr->num_items].lineno = lineno;
48 arr->items[arr->num_items].comment = comment;
49 arr->num_items++;
50 return 1;
51 }
52
53 static void
growable_comment_array_deallocate(growable_comment_array * arr)54 growable_comment_array_deallocate(growable_comment_array *arr) {
55 for (unsigned i = 0; i < arr->num_items; i++) {
56 PyObject_FREE(arr->items[i].comment);
57 }
58 free(arr->items);
59 }
60
61 /* Parse input coming from a string. Return error code, print some errors. */
62 node *
PyParser_ParseString(const char * s,grammar * g,int start,perrdetail * err_ret)63 PyParser_ParseString(const char *s, grammar *g, int start, perrdetail *err_ret)
64 {
65 return PyParser_ParseStringFlagsFilename(s, NULL, g, start, err_ret, 0);
66 }
67
68 node *
PyParser_ParseStringFlags(const char * s,grammar * g,int start,perrdetail * err_ret,int flags)69 PyParser_ParseStringFlags(const char *s, grammar *g, int start,
70 perrdetail *err_ret, int flags)
71 {
72 return PyParser_ParseStringFlagsFilename(s, NULL,
73 g, start, err_ret, flags);
74 }
75
76 node *
PyParser_ParseStringFlagsFilename(const char * s,const char * filename,grammar * g,int start,perrdetail * err_ret,int flags)77 PyParser_ParseStringFlagsFilename(const char *s, const char *filename,
78 grammar *g, int start,
79 perrdetail *err_ret, int flags)
80 {
81 int iflags = flags;
82 return PyParser_ParseStringFlagsFilenameEx(s, filename, g, start,
83 err_ret, &iflags);
84 }
85
86 node *
PyParser_ParseStringObject(const char * s,PyObject * filename,grammar * g,int start,perrdetail * err_ret,int * flags)87 PyParser_ParseStringObject(const char *s, PyObject *filename,
88 grammar *g, int start,
89 perrdetail *err_ret, int *flags)
90 {
91 struct tok_state *tok;
92 int exec_input = start == file_input;
93
94 if (initerr(err_ret, filename) < 0)
95 return NULL;
96
97 if (PySys_Audit("compile", "yO", s, err_ret->filename) < 0) {
98 err_ret->error = E_ERROR;
99 return NULL;
100 }
101
102 if (*flags & PyPARSE_IGNORE_COOKIE)
103 tok = PyTokenizer_FromUTF8(s, exec_input);
104 else
105 tok = PyTokenizer_FromString(s, exec_input);
106 if (tok == NULL) {
107 err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM;
108 return NULL;
109 }
110 if (*flags & PyPARSE_TYPE_COMMENTS) {
111 tok->type_comments = 1;
112 }
113
114 Py_INCREF(err_ret->filename);
115 tok->filename = err_ret->filename;
116 if (*flags & PyPARSE_ASYNC_HACKS)
117 tok->async_hacks = 1;
118 return parsetok(tok, g, start, err_ret, flags);
119 }
120
121 node *
PyParser_ParseStringFlagsFilenameEx(const char * s,const char * filename_str,grammar * g,int start,perrdetail * err_ret,int * flags)122 PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename_str,
123 grammar *g, int start,
124 perrdetail *err_ret, int *flags)
125 {
126 node *n;
127 PyObject *filename = NULL;
128 if (filename_str != NULL) {
129 filename = PyUnicode_DecodeFSDefault(filename_str);
130 if (filename == NULL) {
131 err_ret->error = E_ERROR;
132 return NULL;
133 }
134 }
135 n = PyParser_ParseStringObject(s, filename, g, start, err_ret, flags);
136 Py_XDECREF(filename);
137 return n;
138 }
139
140 /* Parse input coming from a file. Return error code, print some errors. */
141
142 node *
PyParser_ParseFile(FILE * fp,const char * filename,grammar * g,int start,const char * ps1,const char * ps2,perrdetail * err_ret)143 PyParser_ParseFile(FILE *fp, const char *filename, grammar *g, int start,
144 const char *ps1, const char *ps2,
145 perrdetail *err_ret)
146 {
147 return PyParser_ParseFileFlags(fp, filename, NULL,
148 g, start, ps1, ps2, err_ret, 0);
149 }
150
151 node *
PyParser_ParseFileFlags(FILE * fp,const char * filename,const char * enc,grammar * g,int start,const char * ps1,const char * ps2,perrdetail * err_ret,int flags)152 PyParser_ParseFileFlags(FILE *fp, const char *filename, const char *enc,
153 grammar *g, int start,
154 const char *ps1, const char *ps2,
155 perrdetail *err_ret, int flags)
156 {
157 int iflags = flags;
158 return PyParser_ParseFileFlagsEx(fp, filename, enc, g, start, ps1,
159 ps2, err_ret, &iflags);
160 }
161
162 node *
PyParser_ParseFileObject(FILE * fp,PyObject * filename,const char * enc,grammar * g,int start,const char * ps1,const char * ps2,perrdetail * err_ret,int * flags)163 PyParser_ParseFileObject(FILE *fp, PyObject *filename,
164 const char *enc, grammar *g, int start,
165 const char *ps1, const char *ps2,
166 perrdetail *err_ret, int *flags)
167 {
168 struct tok_state *tok;
169
170 if (initerr(err_ret, filename) < 0)
171 return NULL;
172
173 if (PySys_Audit("compile", "OO", Py_None, err_ret->filename) < 0) {
174 return NULL;
175 }
176
177 if ((tok = PyTokenizer_FromFile(fp, enc, ps1, ps2)) == NULL) {
178 err_ret->error = E_NOMEM;
179 return NULL;
180 }
181 if (*flags & PyPARSE_TYPE_COMMENTS) {
182 tok->type_comments = 1;
183 }
184 Py_INCREF(err_ret->filename);
185 tok->filename = err_ret->filename;
186 return parsetok(tok, g, start, err_ret, flags);
187 }
188
189 node *
PyParser_ParseFileFlagsEx(FILE * fp,const char * filename,const char * enc,grammar * g,int start,const char * ps1,const char * ps2,perrdetail * err_ret,int * flags)190 PyParser_ParseFileFlagsEx(FILE *fp, const char *filename,
191 const char *enc, grammar *g, int start,
192 const char *ps1, const char *ps2,
193 perrdetail *err_ret, int *flags)
194 {
195 node *n;
196 PyObject *fileobj = NULL;
197 if (filename != NULL) {
198 fileobj = PyUnicode_DecodeFSDefault(filename);
199 if (fileobj == NULL) {
200 err_ret->error = E_ERROR;
201 return NULL;
202 }
203 }
204 n = PyParser_ParseFileObject(fp, fileobj, enc, g,
205 start, ps1, ps2, err_ret, flags);
206 Py_XDECREF(fileobj);
207 return n;
208 }
209
210 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
211 #if 0
212 static const char with_msg[] =
213 "%s:%d: Warning: 'with' will become a reserved keyword in Python 2.6\n";
214
215 static const char as_msg[] =
216 "%s:%d: Warning: 'as' will become a reserved keyword in Python 2.6\n";
217
218 static void
219 warn(const char *msg, const char *filename, int lineno)
220 {
221 if (filename == NULL)
222 filename = "<string>";
223 PySys_WriteStderr(msg, filename, lineno);
224 }
225 #endif
226 #endif
227
228 /* Parse input coming from the given tokenizer structure.
229 Return error code. */
230
231 static node *
parsetok(struct tok_state * tok,grammar * g,int start,perrdetail * err_ret,int * flags)232 parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
233 int *flags)
234 {
235 parser_state *ps;
236 node *n;
237 int started = 0;
238 int col_offset, end_col_offset;
239 growable_comment_array type_ignores;
240
241 if (!growable_comment_array_init(&type_ignores, 10)) {
242 err_ret->error = E_NOMEM;
243 PyTokenizer_Free(tok);
244 return NULL;
245 }
246
247 if ((ps = PyParser_New(g, start)) == NULL) {
248 err_ret->error = E_NOMEM;
249 growable_comment_array_deallocate(&type_ignores);
250 PyTokenizer_Free(tok);
251 return NULL;
252 }
253 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
254 if (*flags & PyPARSE_BARRY_AS_BDFL)
255 ps->p_flags |= CO_FUTURE_BARRY_AS_BDFL;
256 if (*flags & PyPARSE_TYPE_COMMENTS)
257 ps->p_flags |= PyCF_TYPE_COMMENTS;
258 #endif
259
260 for (;;) {
261 char *a, *b;
262 int type;
263 size_t len;
264 char *str;
265 col_offset = -1;
266 int lineno;
267 const char *line_start;
268
269 type = PyTokenizer_Get(tok, &a, &b);
270 if (type == ERRORTOKEN) {
271 err_ret->error = tok->done;
272 break;
273 }
274 if (type == ENDMARKER && started) {
275 type = NEWLINE; /* Add an extra newline */
276 started = 0;
277 /* Add the right number of dedent tokens,
278 except if a certain flag is given --
279 codeop.py uses this. */
280 if (tok->indent &&
281 !(*flags & PyPARSE_DONT_IMPLY_DEDENT))
282 {
283 tok->pendin = -tok->indent;
284 tok->indent = 0;
285 }
286 }
287 else
288 started = 1;
289 len = (a != NULL && b != NULL) ? b - a : 0;
290 str = (char *) PyObject_MALLOC(len + 1);
291 if (str == NULL) {
292 err_ret->error = E_NOMEM;
293 break;
294 }
295 if (len > 0)
296 strncpy(str, a, len);
297 str[len] = '\0';
298
299 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
300 if (type == NOTEQUAL) {
301 if (!(ps->p_flags & CO_FUTURE_BARRY_AS_BDFL) &&
302 strcmp(str, "!=")) {
303 PyObject_FREE(str);
304 err_ret->error = E_SYNTAX;
305 break;
306 }
307 else if ((ps->p_flags & CO_FUTURE_BARRY_AS_BDFL) &&
308 strcmp(str, "<>")) {
309 PyObject_FREE(str);
310 err_ret->expected = NOTEQUAL;
311 err_ret->error = E_SYNTAX;
312 break;
313 }
314 }
315 #endif
316
317 /* Nodes of type STRING, especially multi line strings
318 must be handled differently in order to get both
319 the starting line number and the column offset right.
320 (cf. issue 16806) */
321 lineno = type == STRING ? tok->first_lineno : tok->lineno;
322 line_start = type == STRING ? tok->multi_line_start : tok->line_start;
323 if (a != NULL && a >= line_start) {
324 col_offset = Py_SAFE_DOWNCAST(a - line_start,
325 intptr_t, int);
326 }
327 else {
328 col_offset = -1;
329 }
330
331 if (b != NULL && b >= tok->line_start) {
332 end_col_offset = Py_SAFE_DOWNCAST(b - tok->line_start,
333 intptr_t, int);
334 }
335 else {
336 end_col_offset = -1;
337 }
338
339 if (type == TYPE_IGNORE) {
340 if (!growable_comment_array_add(&type_ignores, tok->lineno, str)) {
341 err_ret->error = E_NOMEM;
342 break;
343 }
344 continue;
345 }
346
347 if ((err_ret->error =
348 PyParser_AddToken(ps, (int)type, str,
349 lineno, col_offset, tok->lineno, end_col_offset,
350 &(err_ret->expected))) != E_OK) {
351 if (err_ret->error != E_DONE) {
352 PyObject_FREE(str);
353 err_ret->token = type;
354 }
355 break;
356 }
357 }
358
359 if (err_ret->error == E_DONE) {
360 n = ps->p_tree;
361 ps->p_tree = NULL;
362
363 if (n->n_type == file_input) {
364 /* Put type_ignore nodes in the ENDMARKER of file_input. */
365 int num;
366 node *ch;
367 size_t i;
368
369 num = NCH(n);
370 ch = CHILD(n, num - 1);
371 REQ(ch, ENDMARKER);
372
373 for (i = 0; i < type_ignores.num_items; i++) {
374 int res = PyNode_AddChild(ch, TYPE_IGNORE, type_ignores.items[i].comment,
375 type_ignores.items[i].lineno, 0,
376 type_ignores.items[i].lineno, 0);
377 if (res != 0) {
378 err_ret->error = res;
379 PyNode_Free(n);
380 n = NULL;
381 break;
382 }
383 type_ignores.items[i].comment = NULL;
384 }
385 }
386
387 /* Check that the source for a single input statement really
388 is a single statement by looking at what is left in the
389 buffer after parsing. Trailing whitespace and comments
390 are OK. */
391 if (err_ret->error == E_DONE && start == single_input) {
392 char *cur = tok->cur;
393 char c = *tok->cur;
394
395 for (;;) {
396 while (c == ' ' || c == '\t' || c == '\n' || c == '\014')
397 c = *++cur;
398
399 if (!c)
400 break;
401
402 if (c != '#') {
403 err_ret->error = E_BADSINGLE;
404 PyNode_Free(n);
405 n = NULL;
406 break;
407 }
408
409 /* Suck up comment. */
410 while (c && c != '\n')
411 c = *++cur;
412 }
413 }
414 }
415 else
416 n = NULL;
417
418 growable_comment_array_deallocate(&type_ignores);
419
420 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
421 *flags = ps->p_flags;
422 #endif
423 PyParser_Delete(ps);
424
425 if (n == NULL) {
426 if (tok->done == E_EOF)
427 err_ret->error = E_EOF;
428 err_ret->lineno = tok->lineno;
429 if (tok->buf != NULL) {
430 size_t len;
431 assert(tok->cur - tok->buf < INT_MAX);
432 /* if we've managed to parse a token, point the offset to its start,
433 * else use the current reading position of the tokenizer
434 */
435 err_ret->offset = col_offset != -1 ? col_offset + 1 : ((int)(tok->cur - tok->buf));
436 len = tok->inp - tok->buf;
437 err_ret->text = (char *) PyObject_MALLOC(len + 1);
438 if (err_ret->text != NULL) {
439 if (len > 0)
440 strncpy(err_ret->text, tok->buf, len);
441 err_ret->text[len] = '\0';
442 }
443 }
444 } else if (tok->encoding != NULL) {
445 /* 'nodes->n_str' uses PyObject_*, while 'tok->encoding' was
446 * allocated using PyMem_
447 */
448 node* r = PyNode_New(encoding_decl);
449 if (r)
450 r->n_str = PyObject_MALLOC(strlen(tok->encoding)+1);
451 if (!r || !r->n_str) {
452 err_ret->error = E_NOMEM;
453 if (r)
454 PyObject_FREE(r);
455 n = NULL;
456 goto done;
457 }
458 strcpy(r->n_str, tok->encoding);
459 PyMem_FREE(tok->encoding);
460 tok->encoding = NULL;
461 r->n_nchildren = 1;
462 r->n_child = n;
463 n = r;
464 }
465
466 done:
467 PyTokenizer_Free(tok);
468
469 if (n != NULL) {
470 _PyNode_FinalizeEndPos(n);
471 }
472 return n;
473 }
474
475 static int
initerr(perrdetail * err_ret,PyObject * filename)476 initerr(perrdetail *err_ret, PyObject *filename)
477 {
478 err_ret->error = E_OK;
479 err_ret->lineno = 0;
480 err_ret->offset = 0;
481 err_ret->text = NULL;
482 err_ret->token = -1;
483 err_ret->expected = -1;
484 if (filename) {
485 Py_INCREF(filename);
486 err_ret->filename = filename;
487 }
488 else {
489 err_ret->filename = PyUnicode_FromString("<string>");
490 if (err_ret->filename == NULL) {
491 err_ret->error = E_ERROR;
492 return -1;
493 }
494 }
495 return 0;
496 }
497