1 
2 /* Parser-tokenizer link implementation */
3 
4 #include "Python.h"
5 #include "tokenizer.h"
6 #include "node.h"
7 #include "grammar.h"
8 #include "parser.h"
9 #include "parsetok.h"
10 #include "errcode.h"
11 #include "graminit.h"
12 
13 
14 /* Forward */
15 static node *parsetok(struct tok_state *, grammar *, int, perrdetail *, int *);
16 static int initerr(perrdetail *err_ret, PyObject * filename);
17 
18 typedef struct {
19     struct {
20         int lineno;
21         char *comment;
22     } *items;
23     size_t size;
24     size_t num_items;
25 } growable_comment_array;
26 
27 static int
growable_comment_array_init(growable_comment_array * arr,size_t initial_size)28 growable_comment_array_init(growable_comment_array *arr, size_t initial_size) {
29     assert(initial_size > 0);
30     arr->items = malloc(initial_size * sizeof(*arr->items));
31     arr->size = initial_size;
32     arr->num_items = 0;
33 
34     return arr->items != NULL;
35 }
36 
37 static int
growable_comment_array_add(growable_comment_array * arr,int lineno,char * comment)38 growable_comment_array_add(growable_comment_array *arr, int lineno, char *comment) {
39     if (arr->num_items >= arr->size) {
40         arr->size *= 2;
41         arr->items = realloc(arr->items, arr->size * sizeof(*arr->items));
42         if (!arr->items) {
43             return 0;
44         }
45     }
46 
47     arr->items[arr->num_items].lineno = lineno;
48     arr->items[arr->num_items].comment = comment;
49     arr->num_items++;
50     return 1;
51 }
52 
53 static void
growable_comment_array_deallocate(growable_comment_array * arr)54 growable_comment_array_deallocate(growable_comment_array *arr) {
55     for (unsigned i = 0; i < arr->num_items; i++) {
56         PyObject_FREE(arr->items[i].comment);
57     }
58     free(arr->items);
59 }
60 
61 /* Parse input coming from a string.  Return error code, print some errors. */
62 node *
PyParser_ParseString(const char * s,grammar * g,int start,perrdetail * err_ret)63 PyParser_ParseString(const char *s, grammar *g, int start, perrdetail *err_ret)
64 {
65     return PyParser_ParseStringFlagsFilename(s, NULL, g, start, err_ret, 0);
66 }
67 
68 node *
PyParser_ParseStringFlags(const char * s,grammar * g,int start,perrdetail * err_ret,int flags)69 PyParser_ParseStringFlags(const char *s, grammar *g, int start,
70                           perrdetail *err_ret, int flags)
71 {
72     return PyParser_ParseStringFlagsFilename(s, NULL,
73                                              g, start, err_ret, flags);
74 }
75 
76 node *
PyParser_ParseStringFlagsFilename(const char * s,const char * filename,grammar * g,int start,perrdetail * err_ret,int flags)77 PyParser_ParseStringFlagsFilename(const char *s, const char *filename,
78                           grammar *g, int start,
79                           perrdetail *err_ret, int flags)
80 {
81     int iflags = flags;
82     return PyParser_ParseStringFlagsFilenameEx(s, filename, g, start,
83                                                err_ret, &iflags);
84 }
85 
86 node *
PyParser_ParseStringObject(const char * s,PyObject * filename,grammar * g,int start,perrdetail * err_ret,int * flags)87 PyParser_ParseStringObject(const char *s, PyObject *filename,
88                            grammar *g, int start,
89                            perrdetail *err_ret, int *flags)
90 {
91     struct tok_state *tok;
92     int exec_input = start == file_input;
93 
94     if (initerr(err_ret, filename) < 0)
95         return NULL;
96 
97     if (PySys_Audit("compile", "yO", s, err_ret->filename) < 0) {
98         err_ret->error = E_ERROR;
99         return NULL;
100     }
101 
102     if (*flags & PyPARSE_IGNORE_COOKIE)
103         tok = PyTokenizer_FromUTF8(s, exec_input);
104     else
105         tok = PyTokenizer_FromString(s, exec_input);
106     if (tok == NULL) {
107         err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM;
108         return NULL;
109     }
110     if (*flags & PyPARSE_TYPE_COMMENTS) {
111         tok->type_comments = 1;
112     }
113 
114     Py_INCREF(err_ret->filename);
115     tok->filename = err_ret->filename;
116     if (*flags & PyPARSE_ASYNC_HACKS)
117         tok->async_hacks = 1;
118     return parsetok(tok, g, start, err_ret, flags);
119 }
120 
121 node *
PyParser_ParseStringFlagsFilenameEx(const char * s,const char * filename_str,grammar * g,int start,perrdetail * err_ret,int * flags)122 PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename_str,
123                           grammar *g, int start,
124                           perrdetail *err_ret, int *flags)
125 {
126     node *n;
127     PyObject *filename = NULL;
128     if (filename_str != NULL) {
129         filename = PyUnicode_DecodeFSDefault(filename_str);
130         if (filename == NULL) {
131             err_ret->error = E_ERROR;
132             return NULL;
133         }
134     }
135     n = PyParser_ParseStringObject(s, filename, g, start, err_ret, flags);
136     Py_XDECREF(filename);
137     return n;
138 }
139 
140 /* Parse input coming from a file.  Return error code, print some errors. */
141 
142 node *
PyParser_ParseFile(FILE * fp,const char * filename,grammar * g,int start,const char * ps1,const char * ps2,perrdetail * err_ret)143 PyParser_ParseFile(FILE *fp, const char *filename, grammar *g, int start,
144                    const char *ps1, const char *ps2,
145                    perrdetail *err_ret)
146 {
147     return PyParser_ParseFileFlags(fp, filename, NULL,
148                                    g, start, ps1, ps2, err_ret, 0);
149 }
150 
151 node *
PyParser_ParseFileFlags(FILE * fp,const char * filename,const char * enc,grammar * g,int start,const char * ps1,const char * ps2,perrdetail * err_ret,int flags)152 PyParser_ParseFileFlags(FILE *fp, const char *filename, const char *enc,
153                         grammar *g, int start,
154                         const char *ps1, const char *ps2,
155                         perrdetail *err_ret, int flags)
156 {
157     int iflags = flags;
158     return PyParser_ParseFileFlagsEx(fp, filename, enc, g, start, ps1,
159                                      ps2, err_ret, &iflags);
160 }
161 
162 node *
PyParser_ParseFileObject(FILE * fp,PyObject * filename,const char * enc,grammar * g,int start,const char * ps1,const char * ps2,perrdetail * err_ret,int * flags)163 PyParser_ParseFileObject(FILE *fp, PyObject *filename,
164                          const char *enc, grammar *g, int start,
165                          const char *ps1, const char *ps2,
166                          perrdetail *err_ret, int *flags)
167 {
168     struct tok_state *tok;
169 
170     if (initerr(err_ret, filename) < 0)
171         return NULL;
172 
173     if (PySys_Audit("compile", "OO", Py_None, err_ret->filename) < 0) {
174         return NULL;
175     }
176 
177     if ((tok = PyTokenizer_FromFile(fp, enc, ps1, ps2)) == NULL) {
178         err_ret->error = E_NOMEM;
179         return NULL;
180     }
181     if (*flags & PyPARSE_TYPE_COMMENTS) {
182         tok->type_comments = 1;
183     }
184     Py_INCREF(err_ret->filename);
185     tok->filename = err_ret->filename;
186     return parsetok(tok, g, start, err_ret, flags);
187 }
188 
189 node *
PyParser_ParseFileFlagsEx(FILE * fp,const char * filename,const char * enc,grammar * g,int start,const char * ps1,const char * ps2,perrdetail * err_ret,int * flags)190 PyParser_ParseFileFlagsEx(FILE *fp, const char *filename,
191                           const char *enc, grammar *g, int start,
192                           const char *ps1, const char *ps2,
193                           perrdetail *err_ret, int *flags)
194 {
195     node *n;
196     PyObject *fileobj = NULL;
197     if (filename != NULL) {
198         fileobj = PyUnicode_DecodeFSDefault(filename);
199         if (fileobj == NULL) {
200             err_ret->error = E_ERROR;
201             return NULL;
202         }
203     }
204     n = PyParser_ParseFileObject(fp, fileobj, enc, g,
205                                  start, ps1, ps2, err_ret, flags);
206     Py_XDECREF(fileobj);
207     return n;
208 }
209 
210 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
211 #if 0
212 static const char with_msg[] =
213 "%s:%d: Warning: 'with' will become a reserved keyword in Python 2.6\n";
214 
215 static const char as_msg[] =
216 "%s:%d: Warning: 'as' will become a reserved keyword in Python 2.6\n";
217 
218 static void
219 warn(const char *msg, const char *filename, int lineno)
220 {
221     if (filename == NULL)
222         filename = "<string>";
223     PySys_WriteStderr(msg, filename, lineno);
224 }
225 #endif
226 #endif
227 
228 /* Parse input coming from the given tokenizer structure.
229    Return error code. */
230 
231 static node *
parsetok(struct tok_state * tok,grammar * g,int start,perrdetail * err_ret,int * flags)232 parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
233          int *flags)
234 {
235     parser_state *ps;
236     node *n;
237     int started = 0;
238     int col_offset, end_col_offset;
239     growable_comment_array type_ignores;
240 
241     if (!growable_comment_array_init(&type_ignores, 10)) {
242         err_ret->error = E_NOMEM;
243         PyTokenizer_Free(tok);
244         return NULL;
245     }
246 
247     if ((ps = PyParser_New(g, start)) == NULL) {
248         err_ret->error = E_NOMEM;
249         growable_comment_array_deallocate(&type_ignores);
250         PyTokenizer_Free(tok);
251         return NULL;
252     }
253 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
254     if (*flags & PyPARSE_BARRY_AS_BDFL)
255         ps->p_flags |= CO_FUTURE_BARRY_AS_BDFL;
256     if (*flags & PyPARSE_TYPE_COMMENTS)
257         ps->p_flags |= PyCF_TYPE_COMMENTS;
258 #endif
259 
260     for (;;) {
261         char *a, *b;
262         int type;
263         size_t len;
264         char *str;
265         col_offset = -1;
266         int lineno;
267         const char *line_start;
268 
269         type = PyTokenizer_Get(tok, &a, &b);
270         if (type == ERRORTOKEN) {
271             err_ret->error = tok->done;
272             break;
273         }
274         if (type == ENDMARKER && started) {
275             type = NEWLINE; /* Add an extra newline */
276             started = 0;
277             /* Add the right number of dedent tokens,
278                except if a certain flag is given --
279                codeop.py uses this. */
280             if (tok->indent &&
281                 !(*flags & PyPARSE_DONT_IMPLY_DEDENT))
282             {
283                 tok->pendin = -tok->indent;
284                 tok->indent = 0;
285             }
286         }
287         else
288             started = 1;
289         len = (a != NULL && b != NULL) ? b - a : 0;
290         str = (char *) PyObject_MALLOC(len + 1);
291         if (str == NULL) {
292             err_ret->error = E_NOMEM;
293             break;
294         }
295         if (len > 0)
296             strncpy(str, a, len);
297         str[len] = '\0';
298 
299 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
300         if (type == NOTEQUAL) {
301             if (!(ps->p_flags & CO_FUTURE_BARRY_AS_BDFL) &&
302                             strcmp(str, "!=")) {
303                 PyObject_FREE(str);
304                 err_ret->error = E_SYNTAX;
305                 break;
306             }
307             else if ((ps->p_flags & CO_FUTURE_BARRY_AS_BDFL) &&
308                             strcmp(str, "<>")) {
309                 PyObject_FREE(str);
310                 err_ret->expected = NOTEQUAL;
311                 err_ret->error = E_SYNTAX;
312                 break;
313             }
314         }
315 #endif
316 
317         /* Nodes of type STRING, especially multi line strings
318            must be handled differently in order to get both
319            the starting line number and the column offset right.
320            (cf. issue 16806) */
321         lineno = type == STRING ? tok->first_lineno : tok->lineno;
322         line_start = type == STRING ? tok->multi_line_start : tok->line_start;
323         if (a != NULL && a >= line_start) {
324             col_offset = Py_SAFE_DOWNCAST(a - line_start,
325                                           intptr_t, int);
326         }
327         else {
328             col_offset = -1;
329         }
330 
331         if (b != NULL && b >= tok->line_start) {
332             end_col_offset = Py_SAFE_DOWNCAST(b - tok->line_start,
333                                               intptr_t, int);
334         }
335         else {
336             end_col_offset = -1;
337         }
338 
339         if (type == TYPE_IGNORE) {
340             if (!growable_comment_array_add(&type_ignores, tok->lineno, str)) {
341                 err_ret->error = E_NOMEM;
342                 break;
343             }
344             continue;
345         }
346 
347         if ((err_ret->error =
348              PyParser_AddToken(ps, (int)type, str,
349                                lineno, col_offset, tok->lineno, end_col_offset,
350                                &(err_ret->expected))) != E_OK) {
351             if (err_ret->error != E_DONE) {
352                 PyObject_FREE(str);
353                 err_ret->token = type;
354             }
355             break;
356         }
357     }
358 
359     if (err_ret->error == E_DONE) {
360         n = ps->p_tree;
361         ps->p_tree = NULL;
362 
363         if (n->n_type == file_input) {
364             /* Put type_ignore nodes in the ENDMARKER of file_input. */
365             int num;
366             node *ch;
367             size_t i;
368 
369             num = NCH(n);
370             ch = CHILD(n, num - 1);
371             REQ(ch, ENDMARKER);
372 
373             for (i = 0; i < type_ignores.num_items; i++) {
374                 int res = PyNode_AddChild(ch, TYPE_IGNORE, type_ignores.items[i].comment,
375                                           type_ignores.items[i].lineno, 0,
376                                           type_ignores.items[i].lineno, 0);
377                 if (res != 0) {
378                     err_ret->error = res;
379                     PyNode_Free(n);
380                     n = NULL;
381                     break;
382                 }
383                 type_ignores.items[i].comment = NULL;
384             }
385         }
386 
387         /* Check that the source for a single input statement really
388            is a single statement by looking at what is left in the
389            buffer after parsing.  Trailing whitespace and comments
390            are OK.  */
391         if (err_ret->error == E_DONE && start == single_input) {
392             char *cur = tok->cur;
393             char c = *tok->cur;
394 
395             for (;;) {
396                 while (c == ' ' || c == '\t' || c == '\n' || c == '\014')
397                     c = *++cur;
398 
399                 if (!c)
400                     break;
401 
402                 if (c != '#') {
403                     err_ret->error = E_BADSINGLE;
404                     PyNode_Free(n);
405                     n = NULL;
406                     break;
407                 }
408 
409                 /* Suck up comment. */
410                 while (c && c != '\n')
411                     c = *++cur;
412             }
413         }
414     }
415     else
416         n = NULL;
417 
418     growable_comment_array_deallocate(&type_ignores);
419 
420 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
421     *flags = ps->p_flags;
422 #endif
423     PyParser_Delete(ps);
424 
425     if (n == NULL) {
426         if (tok->done == E_EOF)
427             err_ret->error = E_EOF;
428         err_ret->lineno = tok->lineno;
429         if (tok->buf != NULL) {
430             size_t len;
431             assert(tok->cur - tok->buf < INT_MAX);
432             /* if we've managed to parse a token, point the offset to its start,
433              * else use the current reading position of the tokenizer
434              */
435             err_ret->offset = col_offset != -1 ? col_offset + 1 : ((int)(tok->cur - tok->buf));
436             len = tok->inp - tok->buf;
437             err_ret->text = (char *) PyObject_MALLOC(len + 1);
438             if (err_ret->text != NULL) {
439                 if (len > 0)
440                     strncpy(err_ret->text, tok->buf, len);
441                 err_ret->text[len] = '\0';
442             }
443         }
444     } else if (tok->encoding != NULL) {
445         /* 'nodes->n_str' uses PyObject_*, while 'tok->encoding' was
446          * allocated using PyMem_
447          */
448         node* r = PyNode_New(encoding_decl);
449         if (r)
450             r->n_str = PyObject_MALLOC(strlen(tok->encoding)+1);
451         if (!r || !r->n_str) {
452             err_ret->error = E_NOMEM;
453             if (r)
454                 PyObject_FREE(r);
455             n = NULL;
456             goto done;
457         }
458         strcpy(r->n_str, tok->encoding);
459         PyMem_FREE(tok->encoding);
460         tok->encoding = NULL;
461         r->n_nchildren = 1;
462         r->n_child = n;
463         n = r;
464     }
465 
466 done:
467     PyTokenizer_Free(tok);
468 
469     if (n != NULL) {
470         _PyNode_FinalizeEndPos(n);
471     }
472     return n;
473 }
474 
475 static int
initerr(perrdetail * err_ret,PyObject * filename)476 initerr(perrdetail *err_ret, PyObject *filename)
477 {
478     err_ret->error = E_OK;
479     err_ret->lineno = 0;
480     err_ret->offset = 0;
481     err_ret->text = NULL;
482     err_ret->token = -1;
483     err_ret->expected = -1;
484     if (filename) {
485         Py_INCREF(filename);
486         err_ret->filename = filename;
487     }
488     else {
489         err_ret->filename = PyUnicode_FromString("<string>");
490         if (err_ret->filename == NULL) {
491             err_ret->error = E_ERROR;
492             return -1;
493         }
494     }
495     return 0;
496 }
497