1 
2 /* Parser-tokenizer link implementation */
3 
4 #include "../Include/pgenheaders.h"
5 #include "tokenizer.h"
6 #include "../Include/node.h"
7 #include "../Include/grammar.h"
8 #include "parser.h"
9 #include "../Include/parsetok.h"
10 #include "../Include/errcode.h"
11 #include "../Include/graminit.h"
12 
13 
14 /* Forward */
15 static node *parsetok(struct tok_state *, grammar *, int, perrdetail *, int *);
16 static int initerr(perrdetail *err_ret, PyObject * filename);
17 
18 /* Parse input coming from a string.  Return error code, print some errors. */
19 node *
Ta3Parser_ParseString(const char * s,grammar * g,int start,perrdetail * err_ret)20 Ta3Parser_ParseString(const char *s, grammar *g, int start, perrdetail *err_ret)
21 {
22     return Ta3Parser_ParseStringFlagsFilename(s, NULL, g, start, err_ret, 0);
23 }
24 
25 node *
Ta3Parser_ParseStringFlags(const char * s,grammar * g,int start,perrdetail * err_ret,int flags)26 Ta3Parser_ParseStringFlags(const char *s, grammar *g, int start,
27                           perrdetail *err_ret, int flags)
28 {
29     return Ta3Parser_ParseStringFlagsFilename(s, NULL,
30                                              g, start, err_ret, flags);
31 }
32 
33 node *
Ta3Parser_ParseStringFlagsFilename(const char * s,const char * filename,grammar * g,int start,perrdetail * err_ret,int flags)34 Ta3Parser_ParseStringFlagsFilename(const char *s, const char *filename,
35                           grammar *g, int start,
36                           perrdetail *err_ret, int flags)
37 {
38     int iflags = flags;
39     return Ta3Parser_ParseStringFlagsFilenameEx(s, filename, g, start,
40                                                err_ret, &iflags);
41 }
42 
43 node *
Ta3Parser_ParseStringObject(const char * s,PyObject * filename,grammar * g,int start,perrdetail * err_ret,int * flags)44 Ta3Parser_ParseStringObject(const char *s, PyObject *filename,
45                            grammar *g, int start,
46                            perrdetail *err_ret, int *flags)
47 {
48     struct tok_state *tok;
49     int exec_input = start == file_input;
50 
51     if (initerr(err_ret, filename) < 0)
52         return NULL;
53 
54     if (*flags & PyPARSE_IGNORE_COOKIE)
55         tok = Ta3Tokenizer_FromUTF8(s, exec_input);
56     else
57         tok = Ta3Tokenizer_FromString(s, exec_input);
58     if (tok == NULL) {
59         err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM;
60         return NULL;
61     }
62 
63 #ifndef PGEN
64     Py_INCREF(err_ret->filename);
65     tok->filename = err_ret->filename;
66 #endif
67     if (*flags & PyPARSE_ASYNC_ALWAYS)
68         tok->async_always = 1;
69     return parsetok(tok, g, start, err_ret, flags);
70 }
71 
72 node *
Ta3Parser_ParseStringFlagsFilenameEx(const char * s,const char * filename_str,grammar * g,int start,perrdetail * err_ret,int * flags)73 Ta3Parser_ParseStringFlagsFilenameEx(const char *s, const char *filename_str,
74                           grammar *g, int start,
75                           perrdetail *err_ret, int *flags)
76 {
77     node *n;
78     PyObject *filename = NULL;
79 #ifndef PGEN
80     if (filename_str != NULL) {
81         filename = PyUnicode_DecodeFSDefault(filename_str);
82         if (filename == NULL) {
83             err_ret->error = E_ERROR;
84             return NULL;
85         }
86     }
87 #endif
88     n = Ta3Parser_ParseStringObject(s, filename, g, start, err_ret, flags);
89 #ifndef PGEN
90     Py_XDECREF(filename);
91 #endif
92     return n;
93 }
94 
95 /* Parse input coming from a file.  Return error code, print some errors. */
96 
97 node *
Ta3Parser_ParseFile(FILE * fp,const char * filename,grammar * g,int start,const char * ps1,const char * ps2,perrdetail * err_ret)98 Ta3Parser_ParseFile(FILE *fp, const char *filename, grammar *g, int start,
99                    const char *ps1, const char *ps2,
100                    perrdetail *err_ret)
101 {
102     return Ta3Parser_ParseFileFlags(fp, filename, NULL,
103                                    g, start, ps1, ps2, err_ret, 0);
104 }
105 
106 node *
Ta3Parser_ParseFileFlags(FILE * fp,const char * filename,const char * enc,grammar * g,int start,const char * ps1,const char * ps2,perrdetail * err_ret,int flags)107 Ta3Parser_ParseFileFlags(FILE *fp, const char *filename, const char *enc,
108                         grammar *g, int start,
109                         const char *ps1, const char *ps2,
110                         perrdetail *err_ret, int flags)
111 {
112     int iflags = flags;
113     return Ta3Parser_ParseFileFlagsEx(fp, filename, enc, g, start, ps1,
114                                      ps2, err_ret, &iflags);
115 }
116 
117 node *
Ta3Parser_ParseFileObject(FILE * fp,PyObject * filename,const char * enc,grammar * g,int start,const char * ps1,const char * ps2,perrdetail * err_ret,int * flags)118 Ta3Parser_ParseFileObject(FILE *fp, PyObject *filename,
119                          const char *enc, grammar *g, int start,
120                          const char *ps1, const char *ps2,
121                          perrdetail *err_ret, int *flags)
122 {
123     struct tok_state *tok;
124 
125     if (initerr(err_ret, filename) < 0)
126         return NULL;
127 
128     if ((tok = Ta3Tokenizer_FromFile(fp, enc, ps1, ps2)) == NULL) {
129         err_ret->error = E_NOMEM;
130         return NULL;
131     }
132 #ifndef PGEN
133     Py_INCREF(err_ret->filename);
134     tok->filename = err_ret->filename;
135 #endif
136     return parsetok(tok, g, start, err_ret, flags);
137 }
138 
139 node *
Ta3Parser_ParseFileFlagsEx(FILE * fp,const char * filename,const char * enc,grammar * g,int start,const char * ps1,const char * ps2,perrdetail * err_ret,int * flags)140 Ta3Parser_ParseFileFlagsEx(FILE *fp, const char *filename,
141                           const char *enc, grammar *g, int start,
142                           const char *ps1, const char *ps2,
143                           perrdetail *err_ret, int *flags)
144 {
145     node *n;
146     PyObject *fileobj = NULL;
147 #ifndef PGEN
148     if (filename != NULL) {
149         fileobj = PyUnicode_DecodeFSDefault(filename);
150         if (fileobj == NULL) {
151             err_ret->error = E_ERROR;
152             return NULL;
153         }
154     }
155 #endif
156     n = Ta3Parser_ParseFileObject(fp, fileobj, enc, g,
157                                  start, ps1, ps2, err_ret, flags);
158 #ifndef PGEN
159     Py_XDECREF(fileobj);
160 #endif
161     return n;
162 }
163 
164 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
165 #if 0
166 static const char with_msg[] =
167 "%s:%d: Warning: 'with' will become a reserved keyword in Python 2.6\n";
168 
169 static const char as_msg[] =
170 "%s:%d: Warning: 'as' will become a reserved keyword in Python 2.6\n";
171 
172 static void
173 warn(const char *msg, const char *filename, int lineno)
174 {
175     if (filename == NULL)
176         filename = "<string>";
177     PySys_WriteStderr(msg, filename, lineno);
178 }
179 #endif
180 #endif
181 
182 typedef struct {
183     struct {
184         int lineno;
185         char *comment;
186     } *items;
187     size_t size;
188     size_t num_items;
189 } growable_comment_array;
190 
191 static int
growable_comment_array_init(growable_comment_array * arr,size_t initial_size)192 growable_comment_array_init(growable_comment_array *arr, size_t initial_size) {
193     assert(initial_size > 0);
194     arr->items = malloc(initial_size * sizeof(*arr->items));
195     arr->size = initial_size;
196     arr->num_items = 0;
197 
198     return arr->items != NULL;
199 }
200 
201 static int
growable_comment_array_add(growable_comment_array * arr,int lineno,char * comment)202 growable_comment_array_add(growable_comment_array *arr, int lineno, char *comment) {
203     if (arr->num_items >= arr->size) {
204         arr->size *= 2;
205         arr->items = realloc(arr->items, arr->size * sizeof(*arr->items));
206         if (!arr->items) {
207             return 0;
208         }
209     }
210 
211     arr->items[arr->num_items].lineno = lineno;
212     arr->items[arr->num_items].comment = comment;
213     arr->num_items++;
214     return 1;
215 }
216 
217 static void
growable_comment_array_deallocate(growable_comment_array * arr)218 growable_comment_array_deallocate(growable_comment_array *arr) {
219     unsigned i;
220     for (i = 0; i < arr->num_items; i++) {
221         PyObject_FREE(arr->items[i].comment);
222     }
223     free(arr->items);
224 }
225 
226 /* Parse input coming from the given tokenizer structure.
227    Return error code. */
228 
229 static node *
parsetok(struct tok_state * tok,grammar * g,int start,perrdetail * err_ret,int * flags)230 parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
231          int *flags)
232 {
233     parser_state *ps;
234     node *n;
235     int started = 0;
236 
237     growable_comment_array type_ignores;
238     if (!growable_comment_array_init(&type_ignores, 10)) {
239         err_ret->error = E_NOMEM;
240         Ta3Tokenizer_Free(tok);
241         return NULL;
242     }
243 
244     if ((ps = Ta3Parser_New(g, start)) == NULL) {
245         err_ret->error = E_NOMEM;
246         Ta3Tokenizer_Free(tok);
247         return NULL;
248     }
249 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
250     if (*flags & PyPARSE_BARRY_AS_BDFL)
251         ps->p_flags |= CO_FUTURE_BARRY_AS_BDFL;
252 #endif
253 
254     for (;;) {
255         char *a, *b;
256         int type;
257         size_t len;
258         char *str;
259         int col_offset;
260 
261         type = Ta3Tokenizer_Get(tok, &a, &b);
262         if (type == ERRORTOKEN) {
263             err_ret->error = tok->done;
264             break;
265         }
266         if (type == ENDMARKER && started) {
267             type = NEWLINE; /* Add an extra newline */
268             started = 0;
269             /* Add the right number of dedent tokens,
270                except if a certain flag is given --
271                codeop.py uses this. */
272             if (tok->indent &&
273                 !(*flags & PyPARSE_DONT_IMPLY_DEDENT))
274             {
275                 tok->pendin = -tok->indent;
276                 tok->indent = 0;
277             }
278         }
279         else
280             started = 1;
281         len = (a != NULL && b != NULL) ? b - a : 0;
282         str = (char *) PyObject_MALLOC(len + 1);
283         if (str == NULL) {
284             err_ret->error = E_NOMEM;
285             break;
286         }
287         if (len > 0)
288             strncpy(str, a, len);
289         str[len] = '\0';
290 
291 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
292         if (type == NOTEQUAL) {
293             if (!(ps->p_flags & CO_FUTURE_BARRY_AS_BDFL) &&
294                             strcmp(str, "!=")) {
295                 PyObject_FREE(str);
296                 err_ret->error = E_SYNTAX;
297                 break;
298             }
299             else if ((ps->p_flags & CO_FUTURE_BARRY_AS_BDFL) &&
300                             strcmp(str, "<>")) {
301                 PyObject_FREE(str);
302                 err_ret->expected = NOTEQUAL;
303                 err_ret->error = E_SYNTAX;
304                 break;
305             }
306         }
307 #endif
308         if (a != NULL && a >= tok->line_start) {
309             col_offset = Py_SAFE_DOWNCAST(a - tok->line_start,
310                                           intptr_t, int);
311         }
312         else {
313             col_offset = -1;
314         }
315 
316         if (type == TYPE_IGNORE) {
317             if (!growable_comment_array_add(&type_ignores, tok->lineno, str)) {
318                 err_ret->error = E_NOMEM;
319                 break;
320             }
321             continue;
322         }
323 
324         if ((err_ret->error =
325              Ta3Parser_AddToken(ps, (int)type, str,
326                                tok->lineno, col_offset,
327                                &(err_ret->expected))) != E_OK) {
328             if (err_ret->error != E_DONE) {
329                 PyObject_FREE(str);
330                 err_ret->token = type;
331             }
332             break;
333         }
334     }
335 
336     if (err_ret->error == E_DONE) {
337         n = ps->p_tree;
338         ps->p_tree = NULL;
339 
340         if (n->n_type == file_input) {
341             /* Put type_ignore nodes in the ENDMARKER of file_input. */
342             int num;
343             node *ch;
344             size_t i;
345 
346             num = NCH(n);
347             ch = CHILD(n, num - 1);
348             REQ(ch, ENDMARKER);
349 
350             for (i = 0; i < type_ignores.num_items; i++) {
351                 int res = Ta3Node_AddChild(ch, TYPE_IGNORE, type_ignores.items[i].comment,
352                                            type_ignores.items[i].lineno, 0);
353                 if (res != 0) {
354                     err_ret->error = res;
355                     Ta3Node_Free(n);
356                     n = NULL;
357                     break;
358                 }
359                 type_ignores.items[i].comment = NULL;
360             }
361         }
362 
363 #ifndef PGEN
364         /* Check that the source for a single input statement really
365            is a single statement by looking at what is left in the
366            buffer after parsing.  Trailing whitespace and comments
367            are OK.  */
368         if (err_ret->error == E_DONE && start == single_input) {
369             char *cur = tok->cur;
370             char c = *tok->cur;
371 
372             for (;;) {
373                 while (c == ' ' || c == '\t' || c == '\n' || c == '\014')
374                     c = *++cur;
375 
376                 if (!c)
377                     break;
378 
379                 if (c != '#') {
380                     err_ret->error = E_BADSINGLE;
381                     Ta3Node_Free(n);
382                     n = NULL;
383                     break;
384                 }
385 
386                 /* Suck up comment. */
387                 while (c && c != '\n')
388                     c = *++cur;
389             }
390         }
391 #endif
392     }
393     else
394         n = NULL;
395 
396     growable_comment_array_deallocate(&type_ignores);
397 
398 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
399     *flags = ps->p_flags;
400 #endif
401     Ta3Parser_Delete(ps);
402 
403     if (n == NULL) {
404         if (tok->done == E_EOF)
405             err_ret->error = E_EOF;
406         err_ret->lineno = tok->lineno;
407         if (tok->buf != NULL) {
408             size_t len;
409             assert(tok->cur - tok->buf < INT_MAX);
410             err_ret->offset = (int)(tok->cur - tok->buf);
411             len = tok->inp - tok->buf;
412             err_ret->text = (char *) PyObject_MALLOC(len + 1);
413             if (err_ret->text != NULL) {
414                 if (len > 0)
415                     strncpy(err_ret->text, tok->buf, len);
416                 err_ret->text[len] = '\0';
417             }
418         }
419     } else if (tok->encoding != NULL) {
420         /* 'nodes->n_str' uses PyObject_*, while 'tok->encoding' was
421          * allocated using PyMem_
422          */
423         node* r = Ta3Node_New(encoding_decl);
424         if (r)
425             r->n_str = PyObject_MALLOC(strlen(tok->encoding)+1);
426         if (!r || !r->n_str) {
427             err_ret->error = E_NOMEM;
428             if (r)
429                 PyObject_FREE(r);
430             n = NULL;
431             goto done;
432         }
433         strcpy(r->n_str, tok->encoding);
434         PyMem_FREE(tok->encoding);
435         tok->encoding = NULL;
436         r->n_nchildren = 1;
437         r->n_child = n;
438         n = r;
439     }
440 
441 done:
442     Ta3Tokenizer_Free(tok);
443 
444     return n;
445 }
446 
447 static int
initerr(perrdetail * err_ret,PyObject * filename)448 initerr(perrdetail *err_ret, PyObject *filename)
449 {
450     err_ret->error = E_OK;
451     err_ret->lineno = 0;
452     err_ret->offset = 0;
453     err_ret->text = NULL;
454     err_ret->token = -1;
455     err_ret->expected = -1;
456 #ifndef PGEN
457     if (filename) {
458         Py_INCREF(filename);
459         err_ret->filename = filename;
460     }
461     else {
462         err_ret->filename = PyUnicode_FromString("<string>");
463         if (err_ret->filename == NULL) {
464             err_ret->error = E_ERROR;
465             return -1;
466         }
467     }
468 #endif
469     return 0;
470 }
471